From 3b54590563b2b690b1ecf2329a8aae2258e27986 Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Tue, 8 Jul 2025 21:50:09 +0200 Subject: [PATCH 1/3] Add support for Multimodal projector file --- builder/builder.go | 11 +++++++++++ internal/partial/partial.go | 20 ++++++++++++++------ internal/store/model.go | 4 ++++ types/config.go | 3 +++ types/model.go | 1 + 5 files changed, 33 insertions(+), 6 deletions(-) diff --git a/builder/builder.go b/builder/builder.go index e23d7f6..871e462 100644 --- a/builder/builder.go +++ b/builder/builder.go @@ -44,6 +44,17 @@ func (b *Builder) WithContextSize(size uint64) *Builder { } } +// WithMultimodalProjector adds a Multimodal projector file to the artifact +func (b *Builder) WithMultimodalProjector(path string) (*Builder, error) { + mmprojLayer, err := partial.NewLayer(path, types.MediaTypeMultimodalProjector) + if err != nil { + return nil, fmt.Errorf("mmproj layer from %q: %w", path, err) + } + return &Builder{ + model: mutate.AppendLayers(b.model, mmprojLayer), + }, nil +} + // Target represents a build target type Target interface { Write(context.Context, types.ModelArtifact, io.Writer) error diff --git a/internal/partial/partial.go b/internal/partial/partial.go index e31814c..3def14a 100644 --- a/internal/partial/partial.go +++ b/internal/partial/partial.go @@ -7,7 +7,6 @@ import ( v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/partial" ggcr "github.com/google/go-containerregistry/pkg/v1/types" - "github.com/pkg/errors" "github.com/docker/model-distribution/types" ) @@ -67,22 +66,31 @@ type WithLayers interface { } func GGUFPath(i WithLayers) (string, error) { + return layerPathByMediaType(i, types.MediaTypeGGUF) +} + +func MMPROJPath(i WithLayers) (string, error) { + return layerPathByMediaType(i, types.MediaTypeMultimodalProjector) +} + +// layerPathByMediaType is a generic helper function that finds a layer by media type and returns its path +func layerPathByMediaType(i WithLayers, mediaType ggcr.MediaType) (string, error) { layers, err := i.Layers() if err != nil { return "", fmt.Errorf("get layers: %w", err) } for _, l := range layers { mt, err := l.MediaType() - if err != nil || mt != types.MediaTypeGGUF { + if err != nil || mt != mediaType { continue } - ggufLayer, ok := l.(*Layer) + layer, ok := l.(*Layer) if !ok { - return "", errors.New("gguf Layer is not available locally") + return "", fmt.Errorf("%s Layer is not available locally", mediaType) } - return ggufLayer.Path, nil + return layer.Path, nil } - return "", errors.New("model does not contain a GGUF layer") + return "", fmt.Errorf("model does not contain a %s layer", mediaType) } func ManifestForLayers(i WithLayers) (*v1.Manifest, error) { diff --git a/internal/store/model.go b/internal/store/model.go index 85eba02..e48a386 100644 --- a/internal/store/model.go +++ b/internal/store/model.go @@ -114,6 +114,10 @@ func (m *Model) GGUFPath() (string, error) { return mdpartial.GGUFPath(m) } +func (m *Model) MMPROJPath() (string, error) { + return mdpartial.MMPROJPath(m) +} + func (m *Model) Tags() []string { return m.tags } diff --git a/types/config.go b/types/config.go index dbaf2ea..e23a53c 100644 --- a/types/config.go +++ b/types/config.go @@ -21,6 +21,9 @@ const ( // MediaTypeLicense indicates a plain text file containing a license MediaTypeLicense = types.MediaType("application/vnd.docker.ai.license") + // MediaTypeMultimodalProjector indicates a Multimodal projector file + MediaTypeMultimodalProjector = types.MediaType("application/vnd.docker.ai.mmproj") + FormatGGUF = Format("gguf") ) diff --git a/types/model.go b/types/model.go index 575a7d0..7a592ba 100644 --- a/types/model.go +++ b/types/model.go @@ -7,6 +7,7 @@ import ( type Model interface { ID() (string, error) GGUFPath() (string, error) + MMPROJPath() (string, error) Config() (Config, error) Tags() []string Descriptor() (Descriptor, error) From e9dc3ac5f1647441089c5f2a66553f9644186dc5 Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Tue, 8 Jul 2025 22:30:33 +0200 Subject: [PATCH 2/3] Add tests for Multimodal support --- assets/dummy.mmproj | 2 + builder/builder.go | 5 ++ builder/builder_test.go | 128 +++++++++++++++++++++++++++ internal/partial/partial_test.go | 116 ++++++++++++++++++++++++ internal/store/store_test.go | 117 ++++++++++++++++++++++++ internal/store/testdata/dummy.mmproj | 2 + types/config.go | 5 -- 7 files changed, 370 insertions(+), 5 deletions(-) create mode 100644 assets/dummy.mmproj create mode 100644 builder/builder_test.go create mode 100644 internal/partial/partial_test.go create mode 100644 internal/store/testdata/dummy.mmproj diff --git a/assets/dummy.mmproj b/assets/dummy.mmproj new file mode 100644 index 0000000..4a56bf7 --- /dev/null +++ b/assets/dummy.mmproj @@ -0,0 +1,2 @@ +This is a dummy multimodal projector file for testing purposes. +It contains sample content to simulate a real multimodal projector file. diff --git a/builder/builder.go b/builder/builder.go index 871e462..dab9db5 100644 --- a/builder/builder.go +++ b/builder/builder.go @@ -60,6 +60,11 @@ type Target interface { Write(context.Context, types.ModelArtifact, io.Writer) error } +// Model returns the underlying model artifact for testing purposes +func (b *Builder) Model() types.ModelArtifact { + return b.model +} + // Build finalizes the artifact and writes it to the given target, reporting progress to the given writer func (b *Builder) Build(ctx context.Context, target Target, pw io.Writer) error { return target.Write(ctx, b.model, pw) diff --git a/builder/builder_test.go b/builder/builder_test.go new file mode 100644 index 0000000..2cffafe --- /dev/null +++ b/builder/builder_test.go @@ -0,0 +1,128 @@ +package builder_test + +import ( + "path/filepath" + "testing" + + "github.com/docker/model-distribution/builder" + "github.com/docker/model-distribution/types" +) + +func TestWithMultimodalProjector(t *testing.T) { + // Create a builder from a GGUF file + b, err := builder.FromGGUF(filepath.Join("..", "assets", "dummy.gguf")) + if err != nil { + t.Fatalf("Failed to create builder from GGUF: %v", err) + } + + // Add multimodal projector + b2, err := b.WithMultimodalProjector(filepath.Join("..", "assets", "dummy.mmproj")) + if err != nil { + t.Fatalf("Failed to add multimodal projector: %v", err) + } + + // Verify the model has the expected layers + manifest, err := b2.Model().Manifest() + if err != nil { + t.Fatalf("Failed to get manifest: %v", err) + } + + // Should have 2 layers: GGUF + multimodal projector + if len(manifest.Layers) != 2 { + t.Fatalf("Expected 2 layers, got %d", len(manifest.Layers)) + } + + // Check that one layer has the multimodal projector media type + foundMMProjLayer := false + for _, layer := range manifest.Layers { + if layer.MediaType == types.MediaTypeMultimodalProjector { + foundMMProjLayer = true + break + } + } + + if !foundMMProjLayer { + t.Error("Expected to find a layer with multimodal projector media type") + } + + // Note: We can't directly test MMPROJPath() on ModelArtifact interface + // but we can verify the layer was added with correct media type above +} + +func TestWithMultimodalProjectorInvalidPath(t *testing.T) { + // Create a builder from a GGUF file + b, err := builder.FromGGUF(filepath.Join("..", "assets", "dummy.gguf")) + if err != nil { + t.Fatalf("Failed to create builder from GGUF: %v", err) + } + + // Try to add multimodal projector with invalid path + _, err = b.WithMultimodalProjector("nonexistent/path/to/mmproj") + if err == nil { + t.Error("Expected error when adding multimodal projector with invalid path") + } +} + +func TestWithMultimodalProjectorChaining(t *testing.T) { + // Create a builder from a GGUF file + b, err := builder.FromGGUF(filepath.Join("..", "assets", "dummy.gguf")) + if err != nil { + t.Fatalf("Failed to create builder from GGUF: %v", err) + } + + // Chain multiple operations: license + multimodal projector + context size + b2, err := b.WithLicense(filepath.Join("..", "assets", "license.txt")) + if err != nil { + t.Fatalf("Failed to add license: %v", err) + } + + b3, err := b2.WithMultimodalProjector(filepath.Join("..", "assets", "dummy.mmproj")) + if err != nil { + t.Fatalf("Failed to add multimodal projector: %v", err) + } + + b4 := b3.WithContextSize(4096) + + // Verify the final model has all expected layers and properties + manifest, err := b4.Model().Manifest() + if err != nil { + t.Fatalf("Failed to get manifest: %v", err) + } + + // Should have 3 layers: GGUF + license + multimodal projector + if len(manifest.Layers) != 3 { + t.Fatalf("Expected 3 layers, got %d", len(manifest.Layers)) + } + + // Check media types - using string comparison since we can't use types.MediaType directly + expectedMediaTypes := map[string]bool{ + string(types.MediaTypeGGUF): false, + string(types.MediaTypeLicense): false, + string(types.MediaTypeMultimodalProjector): false, + } + + for _, layer := range manifest.Layers { + if _, exists := expectedMediaTypes[string(layer.MediaType)]; exists { + expectedMediaTypes[string(layer.MediaType)] = true + } + } + + for mediaType, found := range expectedMediaTypes { + if !found { + t.Errorf("Expected to find layer with media type %s", mediaType) + } + } + + // Check context size + config, err := b4.Model().Config() + if err != nil { + t.Fatalf("Failed to get config: %v", err) + } + + if config.ContextSize == nil || *config.ContextSize != 4096 { + t.Errorf("Expected context size 4096, got %v", config.ContextSize) + } + + // Note: We can't directly test GGUFPath() and MMPROJPath() on ModelArtifact interface + // but we can verify the layers were added with correct media types above +} diff --git a/internal/partial/partial_test.go b/internal/partial/partial_test.go new file mode 100644 index 0000000..47586df --- /dev/null +++ b/internal/partial/partial_test.go @@ -0,0 +1,116 @@ +package partial_test + +import ( + "path/filepath" + "testing" + + "github.com/docker/model-distribution/internal/gguf" + "github.com/docker/model-distribution/internal/mutate" + "github.com/docker/model-distribution/internal/partial" + "github.com/docker/model-distribution/types" +) + +func TestMMPROJPath(t *testing.T) { + // Create a model from GGUF file + mdl, err := gguf.NewModel(filepath.Join("..", "..", "assets", "dummy.gguf")) + if err != nil { + t.Fatalf("Failed to create model from GGUF: %v", err) + } + + // Add multimodal projector layer + mmprojLayer, err := partial.NewLayer(filepath.Join("..", "..", "assets", "dummy.mmproj"), types.MediaTypeMultimodalProjector) + if err != nil { + t.Fatalf("Failed to create multimodal projector layer: %v", err) + } + + mdlWithMMProj := mutate.AppendLayers(mdl, mmprojLayer) + + // Test MMPROJPath function + mmprojPath, err := partial.MMPROJPath(mdlWithMMProj) + if err != nil { + t.Fatalf("Failed to get multimodal projector path: %v", err) + } + + expectedPath := filepath.Join("..", "..", "assets", "dummy.mmproj") + if mmprojPath != expectedPath { + t.Errorf("Expected multimodal projector path %s, got %s", expectedPath, mmprojPath) + } +} + +func TestMMPROJPathNotFound(t *testing.T) { + // Create a model from a GGUF file without a Multimodal projector + mdl, err := gguf.NewModel(filepath.Join("..", "..", "assets", "dummy.gguf")) + if err != nil { + t.Fatalf("Failed to create model from GGUF: %v", err) + } + + // Test MMPROJPath function should return error + _, err = partial.MMPROJPath(mdl) + if err == nil { + t.Error("Expected error when getting multimodal projector path from model without multimodal projector layer") + } + + expectedErrorMsg := "model does not contain a application/vnd.docker.ai.mmproj layer" + if err.Error() != expectedErrorMsg { + t.Errorf("Expected error message %q, got %q", expectedErrorMsg, err.Error()) + } +} + +func TestGGUFPath(t *testing.T) { + // Create a model from GGUF file + mdl, err := gguf.NewModel(filepath.Join("..", "..", "assets", "dummy.gguf")) + if err != nil { + t.Fatalf("Failed to create model from GGUF: %v", err) + } + + // Test GGUFPath function + ggufPath, err := partial.GGUFPath(mdl) + if err != nil { + t.Fatalf("Failed to get GGUF path: %v", err) + } + + expectedPath := filepath.Join("..", "..", "assets", "dummy.gguf") + if ggufPath != expectedPath { + t.Errorf("Expected GGUF path %s, got %s", expectedPath, ggufPath) + } +} + +func TestLayerPathByMediaType(t *testing.T) { + // Create a model from GGUF file + mdl, err := gguf.NewModel(filepath.Join("..", "..", "assets", "dummy.gguf")) + if err != nil { + t.Fatalf("Failed to create model from GGUF: %v", err) + } + + // Add license layer + licenseLayer, err := partial.NewLayer(filepath.Join("..", "..", "assets", "license.txt"), types.MediaTypeLicense) + if err != nil { + t.Fatalf("Failed to create license layer: %v", err) + } + + // Add a Multimodal projector layer + mmprojLayer, err := partial.NewLayer(filepath.Join("..", "..", "assets", "dummy.mmproj"), types.MediaTypeMultimodalProjector) + if err != nil { + t.Fatalf("Failed to create multimodal projector layer: %v", err) + } + + mdlWithLayers := mutate.AppendLayers(mdl, licenseLayer, mmprojLayer) + + // Test that we can find each layer type + ggufPath, err := partial.GGUFPath(mdlWithLayers) + if err != nil { + t.Fatalf("Failed to get GGUF path: %v", err) + } + if ggufPath != filepath.Join("..", "..", "assets", "dummy.gguf") { + t.Errorf("Expected GGUF path to be: %s, got: %s", filepath.Join("..", "..", "assets", "dummy.gguf"), ggufPath) + } + + mmprojPath, err := partial.MMPROJPath(mdlWithLayers) + if err != nil { + t.Fatalf("Failed to get multimodal projector path: %v", err) + } + if mmprojPath != filepath.Join("..", "..", "assets", "dummy.mmproj") { + t.Errorf("Expected multimodal projector path to be: %s, got: %s", filepath.Join("..", "..", "assets", "dummy.mmproj"), mmprojPath) + } + +} diff --git a/internal/store/store_test.go b/internal/store/store_test.go index b580aef..34786b4 100644 --- a/internal/store/store_test.go +++ b/internal/store/store_test.go @@ -465,6 +465,88 @@ func containsTag(tags []string, tag string) bool { return false } +// TestStoreWithMultimodalProjector tests storing and retrieving models with multimodal projector files +func TestStoreWithMultimodalProjector(t *testing.T) { + // Create a temporary directory for the test store + tempDir, err := os.MkdirTemp("", "store-mmproj-test") + if err != nil { + t.Fatalf("Failed to create temp directory: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create store + storePath := filepath.Join(tempDir, "mmproj-model-store") + s, err := store.New(store.Options{ + RootPath: storePath, + }) + if err != nil { + t.Fatalf("Failed to create store: %v", err) + } + + // Create a model with a Multimodal projector + model := newTestModelWithMultimodalProjector(t) + + // Write the model to store + if err := s.Write(model, []string{"mmproj-model:latest"}, nil); err != nil { + t.Fatalf("Write failed: %v", err) + } + + // Read the model back + readModel, err := s.Read("mmproj-model:latest") + if err != nil { + t.Fatalf("Read failed: %v", err) + } + + // Verify the model has MMPROJPath method + mmprojPath, err := readModel.MMPROJPath() + if err != nil { + t.Fatalf("Failed to get multimodal projector path: %v", err) + } + + if mmprojPath == "" { + t.Error("Expected non-empty multimodal projector path") + } + + // Verify the manifest has the correct layers + manifest, err := readModel.Manifest() + if err != nil { + t.Fatalf("Failed to get manifest: %v", err) + } + + // Should have 3 layers: GGUF + license + multimodal projector + if len(manifest.Layers) != 3 { + t.Fatalf("Expected 3 layers, got %d", len(manifest.Layers)) + } + + // Check that one layer has the multimodal projector media type + foundMMProjLayer := false + for _, layer := range manifest.Layers { + if layer.MediaType == types.MediaTypeMultimodalProjector { + foundMMProjLayer = true + break + } + } + + if !foundMMProjLayer { + t.Error("Expected to find a layer with multimodal projector media type") + } + + // Test List includes the multimodal projector file + models, err := s.List() + if err != nil { + t.Fatalf("List failed: %v", err) + } + + if len(models) != 1 { + t.Fatalf("Expected 1 model, got %d", len(models)) + } + + // Should have 4 files: GGUF blob, license blob, multimodal projector blob, and config + if len(models[0].Files) != 4 { + t.Fatalf("Expected 4 files (gguf, license, mmproj, config), got %d", len(models[0].Files)) + } +} + func newTestModel(t *testing.T) types.ModelArtifact { var mdl types.ModelArtifact var err error @@ -480,3 +562,38 @@ func newTestModel(t *testing.T) types.ModelArtifact { mdl = mutate.AppendLayers(mdl, licenseLayer) return mdl } + +func newTestModelWithMultimodalProjector(t *testing.T) types.ModelArtifact { + var mdl types.ModelArtifact + var err error + + mdl, err = gguf.NewModel(filepath.Join("testdata", "dummy.gguf")) + if err != nil { + t.Fatalf("failed to create model from gguf file: %v", err) + } + + licenseLayer, err := partial.NewLayer(filepath.Join("testdata", "license.txt"), types.MediaTypeLicense) + if err != nil { + t.Fatalf("failed to create license layer: %v", err) + } + + // Create dummy multimodal projector file for testing + tempDir, err := os.MkdirTemp("", "mmproj-test") + if err != nil { + t.Fatalf("failed to create temp directory: %v", err) + } + + mmprojPath := filepath.Join(tempDir, "dummy.mmproj") + mmprojContent := []byte("dummy multimodal projector content for testing") + if err := os.WriteFile(mmprojPath, mmprojContent, 0644); err != nil { + t.Fatalf("failed to create dummy multimodal projector file: %v", err) + } + + mmprojLayer, err := partial.NewLayer(mmprojPath, types.MediaTypeMultimodalProjector) + if err != nil { + t.Fatalf("failed to create multimodal projector layer: %v", err) + } + + mdl = mutate.AppendLayers(mdl, licenseLayer, mmprojLayer) + return mdl +} diff --git a/internal/store/testdata/dummy.mmproj b/internal/store/testdata/dummy.mmproj new file mode 100644 index 0000000..4a56bf7 --- /dev/null +++ b/internal/store/testdata/dummy.mmproj @@ -0,0 +1,2 @@ +This is a dummy multimodal projector file for testing purposes. +It contains sample content to simulate a real multimodal projector file. diff --git a/types/config.go b/types/config.go index e23a53c..8211dd2 100644 --- a/types/config.go +++ b/types/config.go @@ -1,7 +1,6 @@ package types import ( - "strings" "time" v1 "github.com/google/go-containerregistry/pkg/v1" @@ -27,10 +26,6 @@ const ( FormatGGUF = Format("gguf") ) -func IsModelConfig(mt types.MediaType) bool { - return strings.HasPrefix(string(mt), string(MediaTypeModelConfigV01)) -} - type Format string type ConfigFile struct { From 479107a46358873bc210f296fcf74c8fd42cbe83 Mon Sep 17 00:00:00 2001 From: Emily Casey Date: Tue, 8 Jul 2025 22:56:50 -0600 Subject: [PATCH 3/3] Use fake target instead of modifying builder for tests Signed-off-by: Emily Casey --- builder/builder.go | 5 ----- builder/builder_test.go | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/builder/builder.go b/builder/builder.go index dab9db5..871e462 100644 --- a/builder/builder.go +++ b/builder/builder.go @@ -60,11 +60,6 @@ type Target interface { Write(context.Context, types.ModelArtifact, io.Writer) error } -// Model returns the underlying model artifact for testing purposes -func (b *Builder) Model() types.ModelArtifact { - return b.model -} - // Build finalizes the artifact and writes it to the given target, reporting progress to the given writer func (b *Builder) Build(ctx context.Context, target Target, pw io.Writer) error { return target.Write(ctx, b.model, pw) diff --git a/builder/builder_test.go b/builder/builder_test.go index 2cffafe..23b8fc5 100644 --- a/builder/builder_test.go +++ b/builder/builder_test.go @@ -1,6 +1,8 @@ package builder_test import ( + "context" + "io" "path/filepath" "testing" @@ -21,8 +23,14 @@ func TestWithMultimodalProjector(t *testing.T) { t.Fatalf("Failed to add multimodal projector: %v", err) } + // Build the model + target := &fakeTarget{} + if err := b2.Build(t.Context(), target, nil); err != nil { + t.Fatalf("Failed to build model: %v", err) + } + // Verify the model has the expected layers - manifest, err := b2.Model().Manifest() + manifest, err := target.artifact.Manifest() if err != nil { t.Fatalf("Failed to get manifest: %v", err) } @@ -71,20 +79,26 @@ func TestWithMultimodalProjectorChaining(t *testing.T) { } // Chain multiple operations: license + multimodal projector + context size - b2, err := b.WithLicense(filepath.Join("..", "assets", "license.txt")) + b, err = b.WithLicense(filepath.Join("..", "assets", "license.txt")) if err != nil { t.Fatalf("Failed to add license: %v", err) } - b3, err := b2.WithMultimodalProjector(filepath.Join("..", "assets", "dummy.mmproj")) + b, err = b.WithMultimodalProjector(filepath.Join("..", "assets", "dummy.mmproj")) if err != nil { t.Fatalf("Failed to add multimodal projector: %v", err) } - b4 := b3.WithContextSize(4096) + b = b.WithContextSize(4096) + + // Build the model + target := &fakeTarget{} + if err := b.Build(t.Context(), target, nil); err != nil { + t.Fatalf("Failed to build model: %v", err) + } // Verify the final model has all expected layers and properties - manifest, err := b4.Model().Manifest() + manifest, err := target.artifact.Manifest() if err != nil { t.Fatalf("Failed to get manifest: %v", err) } @@ -114,7 +128,7 @@ func TestWithMultimodalProjectorChaining(t *testing.T) { } // Check context size - config, err := b4.Model().Config() + config, err := target.artifact.Config() if err != nil { t.Fatalf("Failed to get config: %v", err) } @@ -126,3 +140,14 @@ func TestWithMultimodalProjectorChaining(t *testing.T) { // Note: We can't directly test GGUFPath() and MMPROJPath() on ModelArtifact interface // but we can verify the layers were added with correct media types above } + +var _ builder.Target = &fakeTarget{} + +type fakeTarget struct { + artifact types.ModelArtifact +} + +func (ft *fakeTarget) Write(ctx context.Context, artifact types.ModelArtifact, writer io.Writer) error { + ft.artifact = artifact + return nil +}