wandb · ngrayluna · Apr 22, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 25, 2024
diff --git a/.github/scripts/post_process_notebook.py b/.github/scripts/post_process_notebook.py
@@ -0,0 +1,75 @@
+#!/bin/usr/python
+
+import os
+import re
+import argparse
+
+
+def add_import_statement():
+    # Add CTA import statement
+    return "import { CTAButtons } from '@site/src/components/CTAButtons/CTAButtons.tsx'\n\n"
+
+def extract_href_links_from_markdown(markdown_text):
+    # Define the regex pattern to match href attribute value in anchor tags
+    href_pattern = r'<a\s+href="([^"]+)"'
+
+    # Use re.findall() to find all href attribute values in the Markdown text
+    href_links = re.findall(href_pattern, markdown_text)
+    return href_links
+
+def format_CTA_button(href_links):
+    # Find index where colab URL link is
+    indices = [index for (index, item) in enumerate(href_links) if "colab" in item]
+    # Only get the first URL link
+    if len(indices) == 1:
+        cta_button = "<CTAButtons colab_button='"+ href_links[0] + "'/>"
+        return cta_button
+    else:
+        return ''
+
+def remove_patterns_from_markdown(markdown_text):
+    # Define the regex patterns to match <img> tags and the specified comment
+    img_pattern = r'<img[^>]+>'
+    div_pattern = r'<div\b[^>]*>.*?</div>'
+    comment_pattern = r'<!---\s*@wandbcode\{.*?\}\s*-->'
+    empty_a_tag_pattern=r'<a\s+[^>]*\s*href\s*=\s*"[^"]*"\s*[^>]*>.*?</a>'
+
+    # Use re.sub() to replace all occurrences of the patterns with an empty string
+    cleaned_text = re.sub(img_pattern, '', markdown_text)
+    cleaned_text = re.sub(div_pattern, '', cleaned_text)
+    cleaned_text = re.sub(comment_pattern, '', cleaned_text)
+    cleaned_text = re.sub(empty_a_tag_pattern, '', cleaned_text)
+
+    return cleaned_text
+
+
+def main(args):
+
+    for colab in args.colab_notebooks:
+        print(colab)
+        # Read the content of the input Markdown file
+        with open(colab, 'r') as file:
+            markdown_text = file.read()
+
+        # Extract href links from the Markdown content
+        href_links = extract_href_links_from_markdown(markdown_text)
+
+        # Create CTA button format
+        colab_button_markdown = format_CTA_button(href_links)
+
+        # Modify the Markdown content (e.g., remove <img> tags and specified comment)
+        cleaned_markdown = remove_patterns_from_markdown(markdown_text)
+
+        # Write the modified Markdown content to the output file
+        with open(colab, 'w') as file:
+            file.write(add_import_statement())
+            file.write(colab_button_markdown)
+            #file.write(add_title(title))  # To do
+            file.write(cleaned_markdown)
+        return
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("colab_notebooks", nargs="*", help="markdown file to process")
+    args = parser.parse_args()
+    main(args)
diff --git a/.github/scripts/rename_notebook.py b/.github/scripts/rename_notebook.py
@@ -0,0 +1,53 @@
+#!/bin/usr/python
+
+import os
+import argparse
+
+# no_longer = {
+#         "RayTune_with_wandb": "",
+#         "Weights_&_Biases_with_fastai": "",
+#         "WandB_Prompts_Quickstart":"",    
+# }
+
+title_mapping = {
+    "Intro_to_Weights_&_Biases": "experiments",
+    "Pipeline_Versioning_with_W&B_Artifacts": "artifacts",
+    "Model_Registry_E2E": "models",
+    "W&B_Tables_Quickstart": "tables",
+    "Organizing_Hyperparameter_Sweeps_in_PyTorch_with_W&B": "sweeps",
+    "Using_W&B_Sweeps_with_XGBoost": "xgboost_sweeps",
+    "Simple_PyTorch_Integration": "pytorch",
+    "Huggingface_wandb": "huggingface",
+    "Hyperparameter_Optimization_in_TensorFlow_using_W&B_Sweeps": "tensorflow_sweeps",
+    "Image_Classification_using_PyTorch_Lightning": "lightning",
+    "Simple_TensorFlow_Integration": "tensorflow",
+    "Use_WandbMetricLogger_in_your_Keras_workflow": "keras",
+    "Use_WandbEvalCallback_in_your_Keras_workflow": "keras_table",
+    "Use_WandbModelCheckpoint_in_your_Keras_workflow": "keras_models",
+}
+
+def rename_markdown_file(filename, title_names):
+    "Checking if we need to rename markdown file..."
+    # Check if .ipynb name exists in our mapping
+    base_name = os.path.basename(filename).split('.')[0]
+    if base_name in title_names:
+        new_filename = title_names[base_name]
+
+        # Rename file
+        print(f"Renaming notebook from {filename} to {new_filename}.md")
+        os.rename(filename, new_filename+".md")
+    else:
+        print(f"No title match found. {filename} reserved.")
+
+
+def main(args):
+    print(args.file)
+    for markdown_file in args.file:
+        rename_markdown_file(markdown_file, title_mapping)
+        return
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("file", nargs="*", help="Notebook to check if it needs converting")
+    args = parser.parse_args()
+    main(args)
diff --git a/.github/workflows/create_markdown.yaml b/.github/workflows/create_markdown.yaml
@@ -0,0 +1,79 @@
+name: Convert Jupyter notebooks to markdown files
+on:
+  pull_request:
+    types: [opened]
+
+jobs:
+  convert_to_markdown:
+    name: Convert Jupyter Notebooks to Markdown
+    runs-on: ubuntu-latest
+    outputs:
+      generated_markdown_files: ${{steps.convert_notebooks.outputs.generated_markdown_files}}
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Install Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+
+    - name: Install Python dependencies
+      run: pip install -r requirements.txt
+
+    - name: Find Modified Jupyter Notebooks
+      id: find_notebooks
+      run: |
+        # Get notebooks that were modified
+        NOTEBOOK_FILES=$(git diff --name-only --diff-filter=AMR HEAD^ HEAD | grep -E "\.ipynb$" | tr '\n' ' ') 
+        # Pass the list to the next step
+        echo "NOTEBOOK_FILES=$NOTEBOOK_FILES" >> $GITHUB_ENV
+
+    - name: Convert Jupyter Notebooks to Markdown
+      if: $NOTEBOOK_FILES != ''
+      id: convert_notebooks
+      run: |
+        # Retrieve notebook file names from previous step
+        for notebook_file in $NOTEBOOK_FILES; do
+          jupyter nbconvert --to markdown "$notebook_file"
+          # Check if conversion was successful
+          if [ $? -ne 0 ]; then
+            echo "Error: Conversion of $notebook_file to Markdown failed."
+            exit 1
+          fi
+        done
+        # Get the list of generated markdown files
+        GENERATED_MARKDOWN_FILES=$(echo "$NOTEBOOK_FILES" | grep -E "\.md$" | tr '\n' ' ')
+        # Pass the list to the next step
+        echo "GENERATED_MARKDOWN_FILES=$GENERATED_MARKDOWN_FILES" >> $GITHUB_OUTPUT
+
+
+  post_process_markdown:
+    name: Post-process Markdown Files
+    needs: convert_to_markdown
+    runs-on: ubuntu-latest
+
+    steps:
+    - env:
+      GENERATED_MARKDOWN_FILES: ${{needs.convert_to_markdown.outputs.generated_markdown_files}}
+
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: Install Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.10'
+
+    - name: Post-process Markdown Files
+      id: post_process_markdown
+      if: $GENERATED_MARKDOWN_FILES != ''
+      run: |
+        # Retrieve generated markdown file names
+        PROCESSED_MARKDOWN=$(python post_process_notebook.py $GENERATED_MARKDOWN_FILES)
+        echo "PROCESSED_MARKDOWN=$PROCESSED_MARKDOWN" >> $GITHUB_ENV
+
+    - name: Rename markdown Files
+      if: $PROCESSED_MARKDOWN != ''
+      run: python rename_notebook.py $PROCESSED_MARKDOWN
diff --git a/.github/workflows/requirements.txt b/.github/workflows/requirements.txt
@@ -0,0 +1 @@
+nbdev