Skip to content

Commit

Permalink
Merge branch 'main' into classify-upd-2
Browse files Browse the repository at this point in the history
  • Loading branch information
mrmer1 authored Jan 31, 2025
2 parents ea2501e + 8834d79 commit dcc1299
Show file tree
Hide file tree
Showing 20 changed files with 2,385 additions and 2,309 deletions.
72 changes: 41 additions & 31 deletions .github/scripts/check_python_code_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,45 +7,69 @@
BASE_DIR = Path(__file__).resolve().parent
MDX_DIR = BASE_DIR / "../../fern/pages"
FILE_PATTERN = re.compile(r"\.mdx$")
EXCLUDE_DIRS = ["cookbooks"] # Add directory names to exclude


def find_files_by_pattern(directory, pattern):
def find_files_by_pattern(directory, pattern, exclude_dirs=None):
"""
Finds all files in the given directory that match the provided regex pattern.
Skips directories listed in exclude_dirs.
"""
exclude_dirs = exclude_dirs or []
directory = Path(directory).resolve()

if not directory.is_dir():
raise ValueError(f"Provided directory {directory} is not valid.")
return [f for f in directory.rglob('*') if f.is_file() and pattern.search(f.name)]

matching_files = []
for root, dirs, files in os.walk(directory):
# Remove excluded directories from the walk
dirs[:] = [d for d in dirs if d not in exclude_dirs]

for file_name in files:
file_path = Path(root) / file_name
if pattern.search(file_name):
matching_files.append(file_path)

return matching_files


def format_python_snippets_in_mdx(file_path, line_length=DEFAULT_LINE_LENGTH):
"""
Formats Python code snippets inside MDX files using Black.
"""
black_mode = black.FileMode(line_length=line_length)
code_block_pattern = re.compile(r"```python\n(.*?)\n```", re.DOTALL)
code_block_pattern = re.compile(r"(`{3,4})(python|python PYTHON)\n(.*?)\n\1", re.DOTALL)

with open(file_path, 'r', encoding='utf-8') as file:
original_content = file.read()

def format_with_black(match):
code = match.group(1)
"""
Formats the matched Python code block using Black
"""
backtick_count = match.group(1) # Preserve the backtick count (``` or ````)
block_label = match.group(2) # Capture the label (python or python PYTHON)
code = match.group(3)

# Comment out lines starting with '!'
processed_code = re.sub(r"^\s*!(.*)", r"# TEMP_COMMENT !\1", code, flags=re.MULTILINE)
# Comment out lines starting with '!' or '%' for formatting
processed_code = re.sub(r"^\s*(!|%)(.*)", r"# TEMP_COMMENT_\1\2", code, flags=re.MULTILINE)

# Format the processed code using Black
try:
# Format the code with Black
formatted_code = black.format_str(processed_code, mode=black_mode)
except black.NothingChanged:
# If Black doesn't change anything, use original
formatted_code = processed_code
return match.group(0) # Return the original block if nothing changed
except black.parsing.InvalidInput as e:
print(f"Error formatting Python code in {file_path}: {e}")
# Optionally return original unformatted code or handle differently
return match.group(0)

# Revert the commented lines starting with '!'
reverted_code = re.sub(r"^\s*# TEMP_COMMENT !(.*)", r"!\1", formatted_code, flags=re.MULTILINE)
# Revert the temporary comments back to their original form
reverted_code = re.sub(r"^\s*# TEMP_COMMENT_(!|%)(.*)", r"\1\2", formatted_code, flags=re.MULTILINE)

return f"```python\n{reverted_code.strip()}\n```"
# Return the fully formatted and reverted block
return f"{backtick_count}{block_label}\n{reverted_code.strip()}\n{backtick_count}"

new_content = code_block_pattern.sub(format_with_black, original_content)

Expand All @@ -55,44 +79,30 @@ def format_with_black(match):
return original_content, new_content


def process_mdx_files(directory, file_pattern, line_length=DEFAULT_LINE_LENGTH, check_changes=False):
def process_mdx_files(directory, file_pattern, exclude_dirs=None, line_length=DEFAULT_LINE_LENGTH):
"""
Processes all MDX files in the directory, formatting Python code snippets.
Args:
directory (Path or str): Path to the directory containing MDX files.
file_pattern (re.Pattern): Regex pattern to match MDX files.
line_length (int): Line length to use for Black formatting.
check_changes (bool): If True, raises an exception if changes are detected.
"""
matching_files = find_files_by_pattern(directory, file_pattern)
files_changed = []
matching_files = find_files_by_pattern(directory, file_pattern, exclude_dirs)

for file_path in matching_files:
original_content, new_content = format_python_snippets_in_mdx(file_path, line_length)

if original_content != new_content:
files_changed.append(file_path)

if check_changes and files_changed:
raise RuntimeError(
f"The following files were modified during the run:\n"
+ "\n".join(str(file) for file in files_changed)
)
print(f"Formatted: {file_path}")


if __name__ == "__main__":
import sys

path = sys.argv[1] if len(sys.argv) > 1 else MDX_DIR
line_length = int(sys.argv[2]) if len(sys.argv) > 2 else DEFAULT_LINE_LENGTH
check_changes = os.getenv("CI") == "true" # Set to True in CI pipeline

if Path(path).is_dir():
process_mdx_files(path, FILE_PATTERN, line_length, check_changes)
process_mdx_files(path, FILE_PATTERN, EXCLUDE_DIRS, line_length)
elif Path(path).is_file():
if FILE_PATTERN.search(path):
process_mdx_files(Path(path).parent, FILE_PATTERN, line_length, check_changes)
process_mdx_files(Path(path).parent, FILE_PATTERN, EXCLUDE_DIRS, line_length)
else:
print("The specified file does not match the MDX pattern.")
else:
Expand Down
18 changes: 16 additions & 2 deletions .github/workflows/check-python-code-snippets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.head_ref }}

- name: Set up Python
uses: actions/setup-python@v4
Expand All @@ -34,6 +36,18 @@ jobs:
- name: Run Python MDX Snippet Formatter
shell: bash
env:
CI: true
run: poetry run python .github/scripts/check_python_code_snippets.py fern/pages

- name: Check for changes
id: diff
run: |
git diff --exit-code || echo "::set-output name=changes::true"
- name: Commit and Push Changes
if: steps.diff.outputs.changes == 'true'
run: |
git config --local user.email "[email protected]"
git config --local user.name "GitHub Action"
git add -u
git commit -m "Format Python snippets in MDX files"
git push
4,305 changes: 2,155 additions & 2,150 deletions cohere-openapi.yaml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ Next, explore the tools on the **Product Detail** page to evaluate how you want
- Subscribing: This section will once again present you with both the pricing details and the EULA for final review before you accept the offer. This information is identical to the information on Product Detail page.
- Configuration: The primary goal of this section is to retrieve the [Amazon Resource Name (ARN)](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html) for the product you have subscribed to.

<Warning>For any Cohere _software_ version after 1.0.5 (or _model_ version after 3.0.5), the parameter `InferenceAmiVersion=al2-ami-sagemaker-inference-gpu-2` must be specified during endpoint configuration (as a variant option) to avoid deployment errors.</Warning>

## Embeddings

You can use this code to invoke Cohere's embed model on Amazon SageMaker:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ These permissions allow a user to manage your organization’s SageMaker subscri

First, navigate to [Cohere’s Sagemaker Marketplace](https://aws.amazon.com/marketplace/seller-profile?id=87af0c85-6cf9-4ed8-bee0-b40ce65167e0) to view the product offerings available to you. Select the product offering to which you are interested in subscribing.

Next, explore the tools on the **Product Detail** page to evaluate how you want to configure your subscription. Some of the key sections to consider are detailed below:
Next, explore the tools on the **Product Detail** page to evaluate how you want to configure your subscription. Some of the key sections to consider are detailed below.

#### Pricing

Expand Down
12 changes: 12 additions & 0 deletions fern/pages/text-generation/tools/tool-use.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,18 @@ if not response.tool_calls:
co.chat(message=message, tools=tools, tool_results=[])
```

## Forcing Tool Use

During the tool calling step, the model may decide to either:
- make tool call(s)
- or, respond to a user message directly.

You can force the model to make tool call(s), i.e. to not respond directly, by setting the `force_single_step=True` and providing some tool definitions through the `tools` parameter.

This is equivalent to setting the `tool_choice` as `REQUIRED` in the v2 API.

Besides, you can force the model to respond directly, by setting `force_single_step=True` and by providing some tool results through the `tool_results` parameter. This is equivalent to specifying `tool_choice` as `NONE` in the v2 API.

## Single-Step Tool Use and Chat History

Single-step tool use functions as part of a two-part conversational process. Here’s how that works:
Expand Down
2 changes: 1 addition & 1 deletion fern/pages/tutorials/build-things-with-cohere.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Next, we'll import the `cohere` library and create a client to be used throughou
import cohere

# Get your API key here: https://dashboard.cohere.com/api-keys
co = cohere.Client(api_key="YOUR_COHERE_API_KEY")
co = cohere.Client(api_key="YOUR_COHERE_API_KEY")
```

# Accessing Cohere from Other Platforms
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ To get started, first we need to install the `cohere` library and create a Coher
import cohere

# Get your API key: https://dashboard.cohere.com/api-keys
co = cohere.Client("COHERE_API_KEY")
co = cohere.Client("COHERE_API_KEY")
```

## Creating a custom preamble
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import numpy as np
import cohere

# Get your API key: https://dashboard.cohere.com/api-keys
co = cohere.Client("COHERE_API_KEY")
co = cohere.Client("COHERE_API_KEY")
```

## Creating tools
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import numpy as np
import cohere

# Get your API key: https://dashboard.cohere.com/api-keys
co = cohere.Client("COHERE_API_KEY")
co = cohere.Client("COHERE_API_KEY")
```

## Basic RAG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import numpy as np
import cohere

# Get your API key: https://dashboard.cohere.com/api-keys
co = cohere.Client("COHERE_API_KEY")
co = cohere.Client("COHERE_API_KEY")
```

## Reranking lexical/semantic search results
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import numpy as np
import cohere

# Get your API key: https://dashboard.cohere.com/api-keys
co = cohere.Client("COHERE_API_KEY")
co = cohere.Client("COHERE_API_KEY")
```

## Embedding the documents
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ To get started, first we need to install the `cohere` library and create a Coher
import cohere

# Get your API key: https://dashboard.cohere.com/api-keys
co = cohere.Client("COHERE_API_KEY")
co = cohere.Client("COHERE_API_KEY")
```

## Basic text generation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ Next, explore the tools on the **Product Detail** page to evaluate how you want
- Subscribing: This section will once again present you with both the pricing details and the EULA for final review before you accept the offer. This information is identical to the information on Product Detail page.
- Configuration: The primary goal of this section is to retrieve the [Amazon Resource Name (ARN)](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html) for the product you have subscribed to.

<Warning>For any Cohere _software_ version after 1.0.5 (or _model_ version after 3.0.5), the parameter `InferenceAmiVersion=al2-ami-sagemaker-inference-gpu-2` must be specified during endpoint configuration (as a variant option) to avoid deployment errors.</Warning>

## Embeddings

You can use this code to invoke Cohere's embed model on Amazon SageMaker:
Expand Down
57 changes: 31 additions & 26 deletions fern/pages/v2/text-generation/structured-outputs.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -141,46 +141,51 @@ In this schema, we defined three keys ("title," "author," "publication_year") an
Here's an example of a nested array. Note that the top level json structure must always be a json object.

```python PYTHON
cohere_api_key = os.getenv('cohere_api_key')
cohere_api_key = os.getenv("cohere_api_key")
co = cohere.ClientV2(cohere_api_key)
response = co.chat(
response_format={
"type": "json_object",
"schema": {
"type": "object",
"properties": {
"actions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"japanese": {"type": "string"},
"romaji": {"type": "string"},
"english": {"type": "string"}
},
"required": ["japanese", "romaji", "english"]
"type": "json_object",
"schema": {
"type": "object",
"properties": {
"actions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"japanese": {"type": "string"},
"romaji": {"type": "string"},
"english": {"type": "string"},
},
"required": ["japanese", "romaji", "english"],
},
}
},
}
"required": ["actions"],
},
"required": ["actions"]
}
},
model="command-r",
messages=[
{"role": "user", "content": "Generate a JSON array of objects with the following fields: japanese, romaji, english. These actions should be japanese verbs provided in the dictionary form.},
]
)
{
"role": "user",
"content": "Generate a JSON array of objects with the following fields: japanese, romaji, english. These actions should be japanese verbs provided in the dictionary form.",
},
],
)
return json.loads(response.message.content[0].text)
```

The output for this example would be:

```json
{"actions": [
{"japanese": "いこう", "romaji": "ikou", "english": "onward"},
{"japanese": "探す", "romaji": "sagasu", "english": "search"},
{"japanese": "話す", "romaji": "hanasu", "english": "talk"}
]
{
"actions": [
{"japanese": "いこう", "romaji": "ikou", "english": "onward"},
{"japanese": "探す", "romaji": "sagasu", "english": "search"},
{"japanese": "話す", "romaji": "hanasu", "english": "talk"}
]
}
```


Expand Down
36 changes: 31 additions & 5 deletions fern/pages/v2/text-generation/tools/tool-use.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -411,17 +411,43 @@ Start: 177 | End: 204 | Text: 'Laptop: $1,000, 15 in stock'
Start: 207 | End: 232 | Text: 'Tablet: $300, 25 in stock'
```

## Forcing Tool Use

During the tool calling step, the model may decide to either:
- make tool call(s)
- or, respond to a user message directly.

You can force the model to make tool call(s), i.e. to not respond directly, by setting the `tool_choice` parameter to `REQUIRED`.

Alternatively, you can force the model to respond directly, i.e. to not make tool call(s), by setting the `tool_choice` parameter to `NONE`.

By default, if you don’t specify the `tool_choice` parameter, then the model will decide whether it's more appropriate to call tools or to respond directly.

```python PYTHON {5}
response = co.chat(
model="command-r-plus-08-2024",
messages=messages,
tools=tools,
tool_choice="REQUIRED" # optional, to force tool calls
# tool_choice="NONE" # optional, to force a direct response
)
```

<Note>This parameter is only compatible with the [Command R7B](https://docs.cohere.com/v2/docs/command-r7b) and newer models.</Note>

## Structured Outputs (Tools)

Setting the `strict_tools` parameter to `True` will enforce each tool call to follow the specified tool schema. To learn more about this feature, visit the [Structured Outputs documentation](https://docs.cohere.com/v2/docs/structured-outputs).

Note that `strict_tools` is currently an experimental feature.

```python PYTHON {4}
response = co.chat(model="command-r-plus-08-2024",
messages=messages,
tools=tools,
strict_tools=True)
```python PYTHON {5}
response = co.chat(
model="command-r-plus-08-2024",
messages=messages,
tools=tools,
strict_tools=True
)
```

## How to Get Good Answers With Tool Use
Expand Down
Loading

0 comments on commit dcc1299

Please sign in to comment.