feat: Add basic multimodal example

GoogleCloudPlatform · arbrown · May 6, 2024 · May 3, 2024 · May 3, 2024 · May 3, 2024
commit 7b758474dbfe325a060b5589bfb2925ffddbe81a
@@ -0,0 +1,48 @@
+// [START generativeaionvertexai_non_stream_multimodality_basic]
+const {VertexAI} = require('@google-cloud/vertexai');
+
+/**
+ * TODO(developer): Update these variables before running the sample.
+ */
+async function generateContent(
+  projectId = 'PROJECT_ID',
+  location = 'us-central1',
+  model = 'gemini-1.5-pro-preview-0409'
+) {
+  // Initialize Vertex AI
+  const vertexAI = new VertexAI({project: projectId, location: location});
+  const generativeModel = vertexAI.getGenerativeModel({model: model});
+
+  const request = {
+    contents: [
+      {
+        role: 'user',
+        parts: [
+          {text: 'Are following video and image correlated?'},
+          {
+            file_data: {
+              file_uri: 'gs://cloud-samples-data/video/animals.mp4',
+              mime_type: 'video/mp4',
+            },
+          },
+          {
+            file_data: {
+              file_uri: 'gs://generativeai-downloads/images/character.jpg',
+              mime_type: 'video/mp4',
+            },
+          },
+        ],
+      },
+    ],
+  };
+
+  const result = await generativeModel.generateContent(request);
+
+  console.log(result.response.candidates[0].content.parts[0].text);
+}
+// [END generativeaionvertexai_non_stream_multimodality_basic]
+
+generateContent(...process.argv.slice(2)).catch(err => {
+  console.error(err.message);
+  process.exitCode = 1;
+});
@@ -0,0 +1,43 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict';
+
+const {assert} = require('chai');
+const {describe, it} = require('mocha');
+const cp = require('child_process');
+const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
+
+const projectId = process.env.CAIP_PROJECT_ID;
+const location = process.env.LOCATION;
+const model = 'gemini-1.5-pro-preview-0409';
+
+describe('Generative AI Multimodal prompt', () => {
+  /**
+   * TODO(developer): Uncomment these variables before running the sample.\
+   * (Not necessary if passing values as arguments)
+   */
+  // const projectId = 'YOUR_PROJECT_ID';
+  // const location = 'YOUR_LOCATION';
+  // const model = 'gemini-1.0-pro';
+
+  it('should generate text based on a prompt containing text, a video, and an image', async () => {
+    const output = execSync(
+      `node ./inference/nonStreamTextBasic.js ${projectId} ${location} ${model}`
+    );
+
+    // Assert that the correct prompt was issued
+    assert(output.length > 0);
+  });
+});