-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup_generic_crawler.sh
More file actions
executable file
·67 lines (56 loc) · 2.04 KB
/
setup_generic_crawler.sh
File metadata and controls
executable file
·67 lines (56 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash
# Setup script for the Generic Documentation Crawler
echo "Setting up Generic Documentation Crawler..."
# Check if Python is installed
if command -v python3 &>/dev/null; then
PYTHON="python3"
elif command -v python &>/dev/null; then
PYTHON="python"
else
echo "Error: Python not found. Please install Python 3.11 or later."
exit 1
fi
# Check Python version
PY_VERSION=$($PYTHON -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
PY_VERSION_MAJOR=$(echo $PY_VERSION | cut -d. -f1)
PY_VERSION_MINOR=$(echo $PY_VERSION | cut -d. -f2)
if [ "$PY_VERSION_MAJOR" -lt 3 ] || ([ "$PY_VERSION_MAJOR" -eq 3 ] && [ "$PY_VERSION_MINOR" -lt 11 ]); then
echo "Error: Python 3.11 or later is required. Current version: $PY_VERSION"
exit 1
fi
echo "Python version $PY_VERSION detected."
# Create a virtual environment (optional but recommended)
echo "Creating a virtual environment..."
$PYTHON -m venv venv
# Activate the virtual environment
if [ -f "venv/bin/activate" ]; then
source venv/bin/activate
echo "Virtual environment activated."
elif [ -f "venv/Scripts/activate" ]; then
source venv/Scripts/activate
echo "Virtual environment activated."
else
echo "Error: Could not activate virtual environment."
exit 1
fi
# Install Python dependencies
echo "Installing Python dependencies..."
pip install -r requirements.txt
# Install Playwright browsers
echo "Installing Playwright browsers..."
playwright install
# Create .env file if it doesn't exist
if [ ! -f ".env" ]; then
echo "Creating .env file..."
echo "OPENAI_API_KEY=" > .env
echo "LLM_MODEL=gpt-4o-mini" >> .env
echo ".env file created. Please edit it to add your OpenAI API key."
fi
echo ""
echo "Setup complete! Before running the crawler, make sure to:"
echo "1. Add your OpenAI API key to the .env file"
echo "2. Activate the virtual environment with: source venv/bin/activate (on Unix/macOS) or venv\\Scripts\\activate (on Windows)"
echo ""
echo "Then run the crawler with:"
echo "python generic_docs_crawler.py https://your-documentation-site.com"
echo ""