diff --git a/README.md b/README.md index 5c46f48..125ea85 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# nlm - NotebookLM CLI Tool 📚 +# nlm - NotebookLM CLI Tool 📚 `nlm` is a command-line interface for Google's NotebookLM, allowing you to manage notebooks, sources, and audio overviews from your terminal. diff --git a/docs/WSL.md b/docs/WSL.md new file mode 100644 index 0000000..b3f8cf8 --- /dev/null +++ b/docs/WSL.md @@ -0,0 +1,146 @@ +# WSL Setup Guide for nlm + +This guide explains how to set up `nlm` (NotebookLM CLI) on Windows Subsystem for Linux (WSL). + +## The Problem + +Chrome/Chromium on Linux encrypts cookies using the system keyring. When `nlm` copies the browser profile to a temporary directory, the encryption keys are lost and authentication fails with "redirected to authentication page - not logged in". + +## Solution + +Use the `NLM_USE_ORIGINAL_PROFILE=1` environment variable to make `nlm` use the original profile directory instead of copying it. + +## Prerequisites + +- WSL2 installed on Windows +- Go 1.21+ installed in WSL + +## Installation Steps + +### 1. Install Chromium in WSL + +```bash +# Ubuntu/Debian +sudo apt update +sudo apt install chromium-browser + +# Or via snap (if apt version is not available) +sudo snap install chromium +``` + +### 2. Create Profile Symlink + +`nlm` looks for Chrome profiles in `~/.config/google-chrome`. Since we're using Chromium, create a symlink: + +```bash +# For apt-installed Chromium +ln -sf ~/.config/chromium ~/.config/google-chrome + +# For snap-installed Chromium +ln -sf ~/snap/chromium/common/chromium ~/.config/google-chrome +``` + +### 3. Initial Browser Setup + +Launch Chromium with basic password storage (to avoid keyring prompts): + +```bash +chromium --password-store=basic +``` + +Then: +1. Navigate to https://notebooklm.google.com +2. Sign in with your Google account +3. Close the browser + +### 4. Install nlm + +```bash +go install github.com/tmc/nlm/cmd/nlm@latest +``` + +### 5. Authenticate nlm + +Run the authentication with the original profile flag: + +```bash +NLM_USE_ORIGINAL_PROFILE=1 ~/go/bin/nlm auth -debug +``` + +You should see: +``` +Using original profile directory: /home/username/.config/google-chrome +... +Authentication successful! +``` + +### 6. Verify Installation + +```bash +# List your notebooks +~/go/bin/nlm list + +# List sources in a notebook +~/go/bin/nlm sources +``` + +## Usage + +Always use `NLM_USE_ORIGINAL_PROFILE=1` when running `nlm` commands in WSL: + +```bash +export NLM_USE_ORIGINAL_PROFILE=1 +nlm list +nlm generate-chat "Your question here" +``` + +Or add to your `.bashrc`: + +```bash +echo 'export NLM_USE_ORIGINAL_PROFILE=1' >> ~/.bashrc +source ~/.bashrc +``` + +## Troubleshooting + +### "no valid profiles found" + +Make sure the symlink exists: +```bash +ls -la ~/.config/google-chrome +``` + +Should point to your Chromium profile directory. + +### "redirected to authentication page" + +1. Make sure you're using `NLM_USE_ORIGINAL_PROFILE=1` +2. Re-login to NotebookLM in Chromium manually +3. Run `nlm auth -debug` again + +### Chrome process conflicts + +If authentication fails, close any running Chromium processes: +```bash +pkill -f chromium +``` + +Then try again. + +## How It Works + +The `NLM_USE_ORIGINAL_PROFILE` environment variable tells `nlm` to: + +1. **Without flag (default)**: Copy browser profile to a temp directory, losing encryption keys +2. **With flag=1**: Use the original profile directory directly, preserving cookie encryption + +This is implemented in `internal/auth/auth.go` in both `tryMultipleProfiles()` and `GetAuth()` functions. + +## Security Note + +When using `NLM_USE_ORIGINAL_PROFILE=1`, `nlm` has access to your actual browser profile. This is necessary for authentication but means: + +- Close other Chromium windows before running `nlm` to avoid profile lock conflicts +- The browser automation has access to your real cookies and session data + +This is the same level of access as your regular browser session. diff --git a/gen/method/LabsTailwindOrchestrationService_ActOnSources_encoder.go b/gen/method/LabsTailwindOrchestrationService_ActOnSources_encoder.go index 2e6ca5a..424deec 100644 --- a/gen/method/LabsTailwindOrchestrationService_ActOnSources_encoder.go +++ b/gen/method/LabsTailwindOrchestrationService_ActOnSources_encoder.go @@ -2,21 +2,44 @@ package method import ( notebooklmv1alpha1 "github.com/tmc/nlm/gen/notebooklm/v1alpha1" - "github.com/tmc/nlm/internal/rpc/argbuilder" ) // GENERATION_BEHAVIOR: append // EncodeActOnSourcesArgs encodes arguments for LabsTailwindOrchestrationService.ActOnSources // RPC ID: yyryJe -// Argument format: [%project_id%, %action%, %source_ids%] +// Updated format based on actual browser API calls (November 2025) +// Format: [[[[source_ids]]],null,null,null,null,[action,[[context]],extra],null,[2,null,[1]]] func EncodeActOnSourcesArgs(req *notebooklmv1alpha1.ActOnSourcesRequest) []interface{} { - // Using generalized argument encoder - args, err := argbuilder.EncodeRPCArgs(req, "[%project_id%, %action%, %source_ids%]") - if err != nil { - // Log error and return empty args as fallback - // In production, this should be handled better - return []interface{}{} + // Build nested source IDs array + // Browser format in f.req args: [[[source_id]]] (3 levels in position 0) + // Because the whole args array adds 1 level, position 0 needs 3 levels + var sourceIDsInner []interface{} + for _, sid := range req.GetSourceIds() { + sourceIDsInner = append(sourceIDsInner, sid) } + // sourceIDsInner = [sid] - 1 level + // Wrap 2 more times: [[[sid]]] + sourceIDsNested := []interface{}{[]interface{}{sourceIDsInner}} + + // Build action info: [action, [["[CONTEXT]", ""]], ""] + actionInfo := []interface{}{ + req.GetAction(), + []interface{}{[]interface{}{"[CONTEXT]", ""}}, + "", + } + + // Build the full argument array + args := []interface{}{ + sourceIDsNested, // Position 0: [[[[source_ids]]]] + nil, // Position 1 + nil, // Position 2 + nil, // Position 3 + nil, // Position 4 + actionInfo, // Position 5: [action, [[context]], extra] + nil, // Position 6 + []interface{}{2, nil, []interface{}{1}}, // Position 7: metadata + } + return args } diff --git a/gen/method/LabsTailwindOrchestrationService_GenerateFreeFormStreamed_encoder.go b/gen/method/LabsTailwindOrchestrationService_GenerateFreeFormStreamed_encoder.go index ff295a7..c27c070 100644 --- a/gen/method/LabsTailwindOrchestrationService_GenerateFreeFormStreamed_encoder.go +++ b/gen/method/LabsTailwindOrchestrationService_GenerateFreeFormStreamed_encoder.go @@ -2,39 +2,35 @@ package method import ( notebooklmv1alpha1 "github.com/tmc/nlm/gen/notebooklm/v1alpha1" - "github.com/tmc/nlm/internal/rpc/argbuilder" ) // GENERATION_BEHAVIOR: append // EncodeGenerateFreeFormStreamedArgs encodes arguments for LabsTailwindOrchestrationService.GenerateFreeFormStreamed // RPC ID: BD -// Argument format: [[%all_sources%], %prompt%, null, [2]] when sources present -// Fallback format: [%project_id%, %prompt%] when no sources +// Updated format based on browser API calls pattern (November 2025) +// Format similar to ActOnSources: [[[source_ids]],prompt,null,null,null,null,null,[2,null,[1]]] func EncodeGenerateFreeFormStreamedArgs(req *notebooklmv1alpha1.GenerateFreeFormStreamedRequest) []interface{} { - // If sources are provided, use the gRPC format with sources - if len(req.SourceIds) > 0 { - // Build source array - sourceArray := make([]interface{}, len(req.SourceIds)) - for i, sourceId := range req.SourceIds { - sourceArray[i] = []interface{}{sourceId} - } - - // Use gRPC format: [[%all_sources%], %prompt%, null, [2]] - return []interface{}{ - []interface{}{sourceArray}, - req.Prompt, - nil, - []interface{}{2}, - } + // Build source IDs array with 2 levels of nesting + // Final format in f.req: [[[source_id]]] (3 levels after JSON serialization adds 1) + var sourceIDsInner []interface{} + for _, sid := range req.GetSourceIds() { + sourceIDsInner = append(sourceIDsInner, sid) } + // Wrap 1 time: [[sid]] + sourceIDsNested := []interface{}{sourceIDsInner} - // Fallback to old format without sources - args, err := argbuilder.EncodeRPCArgs(req, "[%project_id%, %prompt%]") - if err != nil { - // Log error and return empty args as fallback - // In production, this should be handled better - return []interface{}{} + // Build the full argument array + args := []interface{}{ + sourceIDsNested, // Position 0: [[source_ids]] + req.GetPrompt(), // Position 1: prompt text + nil, // Position 2 + nil, // Position 3 + nil, // Position 4 + nil, // Position 5 + nil, // Position 6 + []interface{}{2, nil, []interface{}{1}}, // Position 7: metadata } + return args } diff --git a/gen/service/LabsTailwindOrchestrationService_client.go b/gen/service/LabsTailwindOrchestrationService_client.go index 6bf5d74..a9a56ed 100644 --- a/gen/service/LabsTailwindOrchestrationService_client.go +++ b/gen/service/LabsTailwindOrchestrationService_client.go @@ -6,6 +6,7 @@ package service import ( "context" + "encoding/json" "fmt" "github.com/tmc/nlm/gen/method" @@ -13,6 +14,7 @@ import ( "github.com/tmc/nlm/internal/batchexecute" "github.com/tmc/nlm/internal/beprotojson" "github.com/tmc/nlm/internal/rpc" + "github.com/tmc/nlm/internal/rpc/grpcendpoint" "google.golang.org/protobuf/types/known/emptypb" ) @@ -170,8 +172,9 @@ func (c *LabsTailwindOrchestrationServiceClient) ListArtifacts(ctx context.Conte func (c *LabsTailwindOrchestrationServiceClient) ActOnSources(ctx context.Context, req *notebooklmv1alpha1.ActOnSourcesRequest) (*emptypb.Empty, error) { // Build the RPC call call := rpc.Call{ - ID: "yyryJe", - Args: method.EncodeActOnSourcesArgs(req), + ID: "yyryJe", + Args: method.EncodeActOnSourcesArgs(req), + NotebookID: req.GetProjectId(), } // Execute the RPC @@ -701,26 +704,52 @@ func (c *LabsTailwindOrchestrationServiceClient) GenerateDocumentGuides(ctx cont } // GenerateFreeFormStreamed calls the GenerateFreeFormStreamed RPC method. +// Uses the gRPC-style endpoint instead of batchexecute for chat functionality. func (c *LabsTailwindOrchestrationServiceClient) GenerateFreeFormStreamed(ctx context.Context, req *notebooklmv1alpha1.GenerateFreeFormStreamedRequest) (*notebooklmv1alpha1.GenerateFreeFormStreamedResponse, error) { - // Build the RPC call - call := rpc.Call{ - ID: "BD", - Args: method.EncodeGenerateFreeFormStreamedArgs(req), + // Create gRPC endpoint client using the same auth credentials + grpcClient := grpcendpoint.NewClient(c.rpcClient.Config.AuthToken, c.rpcClient.Config.Cookies) + + // Build the request body using the browser-compatible format + requestBody := grpcendpoint.BuildChatRequest(req.GetSourceIds(), req.GetPrompt()) + + // Create the gRPC request + grpcReq := grpcendpoint.Request{ + Endpoint: "/google.internal.labs.tailwind.orchestration.v1.LabsTailwindOrchestrationService/GenerateFreeFormStreamed", + Body: requestBody, } - // Execute the RPC - resp, err := c.rpcClient.Do(call) + // Execute the gRPC request + resp, err := grpcClient.Execute(grpcReq) if err != nil { return nil, fmt.Errorf("GenerateFreeFormStreamed: %w", err) } - // Decode the response - var result notebooklmv1alpha1.GenerateFreeFormStreamedResponse - if err := beprotojson.Unmarshal(resp, &result); err != nil { - return nil, fmt.Errorf("GenerateFreeFormStreamed: unmarshal response: %w", err) + // Parse the response manually - format is: [["text", null, [...], ...]] + // The first element of the inner array is the text response + var outerArray []interface{} + if err := json.Unmarshal(resp, &outerArray); err != nil { + return nil, fmt.Errorf("GenerateFreeFormStreamed: parse response: %w", err) } - return &result, nil + if len(outerArray) == 0 { + return nil, fmt.Errorf("GenerateFreeFormStreamed: empty response") + } + + // Extract the text from the first element + innerArray, ok := outerArray[0].([]interface{}) + if !ok || len(innerArray) == 0 { + return nil, fmt.Errorf("GenerateFreeFormStreamed: invalid response format") + } + + text, ok := innerArray[0].(string) + if !ok { + return nil, fmt.Errorf("GenerateFreeFormStreamed: text not found in response") + } + + return ¬ebooklmv1alpha1.GenerateFreeFormStreamedResponse{ + Chunk: text, + IsFinal: true, + }, nil } // GenerateNotebookGuide calls the GenerateNotebookGuide RPC method. diff --git a/internal/api/client.go b/internal/api/client.go index a8ce765..dc2229e 100644 --- a/internal/api/client.go +++ b/internal/api/client.go @@ -13,6 +13,7 @@ import ( "net/url" "os" "path/filepath" + "strconv" "strings" "time" @@ -343,29 +344,46 @@ func (c *Client) AddSourceFromText(projectID string, content, title string) (str } func (c *Client) AddSourceFromBase64(projectID string, content, filename, contentType string) (string, error) { - resp, err := c.rpc.Do(rpc.Call{ - ID: rpc.RPCAddSources, + // Decode base64 content to get raw bytes + rawContent, err := base64.StdEncoding.DecodeString(content) + if err != nil { + return "", fmt.Errorf("decode base64 content: %w", err) + } + + // Step 1: Register the file with RPC o4cbdc to get SOURCE_ID + registerResp, err := c.rpc.Do(rpc.Call{ + ID: rpc.RPCRegisterBinarySource, NotebookID: projectID, Args: []interface{}{ []interface{}{ - []interface{}{ - content, - filename, - contentType, - "base64", - }, + []interface{}{filename}, }, projectID, + []interface{}{2}, + []interface{}{1, nil, nil, nil, nil, nil, nil, nil, nil, nil, []interface{}{1}}, }, }) if err != nil { - return "", fmt.Errorf("add binary source: %w", err) + return "", fmt.Errorf("register binary source: %w", err) } - sourceID, err := extractSourceID(resp) + // Extract SOURCE_ID from response + sourceID, err := extractSourceIDFromRegisterResponse(registerResp) if err != nil { - return "", fmt.Errorf("extract source ID: %w", err) + return "", fmt.Errorf("extract source ID from register response: %w", err) } + + // Step 2: Initialize resumable upload + uploadURL, err := c.initializeResumableUpload(projectID, filename, sourceID, len(rawContent)) + if err != nil { + return "", fmt.Errorf("initialize resumable upload: %w", err) + } + + // Step 3: Upload the file content + if err := c.uploadFileContent(uploadURL, rawContent); err != nil { + return "", fmt.Errorf("upload file content: %w", err) + } + return sourceID, nil } @@ -533,6 +551,114 @@ func extractSourceID(resp json.RawMessage) (string, error) { return "", fmt.Errorf("could not find source ID in response structure: %v", data) } +// extractSourceIDFromRegisterResponse extracts the SOURCE_ID from o4cbdc response +func extractSourceIDFromRegisterResponse(resp json.RawMessage) (string, error) { + // Response format: [[[[SOURCE_ID],filename,[null,null,null,null,0]]],null,[...]] + var data []interface{} + if err := json.Unmarshal(resp, &data); err != nil { + return "", fmt.Errorf("parse response JSON: %w", err) + } + + // Navigate: data[0][0][0][0] is an array containing the SOURCE_ID as first element + if len(data) > 0 { + if d0, ok := data[0].([]interface{}); ok && len(d0) > 0 { + if d1, ok := d0[0].([]interface{}); ok && len(d1) > 0 { + if d2, ok := d1[0].([]interface{}); ok && len(d2) > 0 { + // d2[0] is the array [SOURCE_ID] + if d3, ok := d2[0].([]interface{}); ok && len(d3) > 0 { + if id, ok := d3[0].(string); ok { + return id, nil + } + } + // Try alternative: d2[0] might be directly a string + if id, ok := d2[0].(string); ok { + return id, nil + } + } + } + } + } + + return "", fmt.Errorf("could not find source ID in register response: %v", data) +} + +// initializeResumableUpload initializes the resumable upload and returns the upload URL +func (c *Client) initializeResumableUpload(projectID, filename, sourceID string, contentLength int) (string, error) { + initURL := "https://notebooklm.google.com/upload/_/?authuser=0" + + // Prepare payload + payload := fmt.Sprintf(`{"PROJECT_ID":"%s","SOURCE_NAME":"%s","SOURCE_ID":"%s"}`, projectID, filename, sourceID) + + req, err := http.NewRequest("POST", initURL, strings.NewReader(payload)) + if err != nil { + return "", fmt.Errorf("create init request: %w", err) + } + + // Set headers + req.Header.Set("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") + req.Header.Set("x-goog-upload-command", "start") + req.Header.Set("x-goog-upload-header-content-length", strconv.Itoa(contentLength)) + req.Header.Set("x-goog-upload-protocol", "resumable") + req.Header.Set("Cookie", c.rpc.Config.Cookies) + + // Execute request + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("execute init request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("init upload failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Extract upload URL from response header + uploadURL := resp.Header.Get("x-goog-upload-url") + if uploadURL == "" { + return "", fmt.Errorf("no upload URL in response headers") + } + + return uploadURL, nil +} + +// uploadFileContent uploads the file content to the resumable upload URL +func (c *Client) uploadFileContent(uploadURL string, content []byte) error { + req, err := http.NewRequest("POST", uploadURL, bytes.NewReader(content)) + if err != nil { + return fmt.Errorf("create upload request: %w", err) + } + + // Set headers + req.Header.Set("Content-Type", "application/x-www-form-urlencoded;charset=utf-8") + req.Header.Set("x-goog-upload-command", "upload, finalize") + req.Header.Set("x-goog-upload-offset", "0") + req.Header.Set("Content-Length", strconv.Itoa(len(content))) + req.Header.Set("Cookie", c.rpc.Config.Cookies) + + // Execute request + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("execute upload request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Check upload status + uploadStatus := resp.Header.Get("x-goog-upload-status") + if uploadStatus != "final" { + return fmt.Errorf("upload not finalized, status: %s", uploadStatus) + } + + return nil +} + // Note operations func (c *Client) CreateNote(projectID string, title string, initialContent string) (*Note, error) { diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 2c15a2b..32d0b5e 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -615,16 +615,32 @@ func (ba *BrowserAuth) GetAuth(opts ...Option) (token, cookies string, err error return "", "", fmt.Errorf("no valid profiles found") } - // Create a temporary directory and copy profile data to preserve encryption keys - tempDir, err := os.MkdirTemp("", "nlm-chrome-*") - if err != nil { - return "", "", fmt.Errorf("create temp dir: %w", err) - } - ba.tempDir = tempDir + // Determine the user data directory to use + // When NLM_USE_ORIGINAL_PROFILE=1, use the original profile directory directly. + // This is useful for environments like WSL where cookie encryption keys are tied + // to the original profile location and copying them breaks authentication. + var userDataDir string + useOriginalProfile := os.Getenv("NLM_USE_ORIGINAL_PROFILE") == "1" + + if useOriginalProfile { + // Use the parent directory of the profile path (e.g., ~/.config/google-chrome) + userDataDir = filepath.Dir(selectedProfile.Path) + if ba.debug { + fmt.Printf("Using original profile directory: %s\n", userDataDir) + } + } else { + // Default behavior: create a temporary directory and copy profile data + tempDir, err := os.MkdirTemp("", "nlm-chrome-*") + if err != nil { + return "", "", fmt.Errorf("create temp dir: %w", err) + } + ba.tempDir = tempDir + userDataDir = tempDir - // Copy the profile data - if err := ba.copyProfileDataFromPath(selectedProfile.Path); err != nil { - return "", "", fmt.Errorf("copy profile: %w", err) + // Copy the profile data + if err := ba.copyProfileDataFromPath(selectedProfile.Path); err != nil { + return "", "", fmt.Errorf("copy profile: %w", err) + } } var ctx context.Context @@ -634,7 +650,7 @@ func (ba *BrowserAuth) GetAuth(opts ...Option) (token, cookies string, err error chromeOpts := []chromedp.ExecAllocatorOption{ chromedp.NoFirstRun, chromedp.NoDefaultBrowserCheck, - chromedp.UserDataDir(ba.tempDir), + chromedp.UserDataDir(userDataDir), chromedp.Flag("headless", !ba.debug), chromedp.Flag("window-size", "1280,800"), chromedp.Flag("new-window", true), @@ -646,6 +662,14 @@ func (ba *BrowserAuth) GetAuth(opts ...Option) (token, cookies string, err error chromedp.ExecPath(getBrowserPathForProfile(selectedProfile.Browser)), } + // If using original profile, add the specific profile directory flag + if useOriginalProfile { + profileName := filepath.Base(selectedProfile.Path) + if profileName != "Default" { + chromeOpts = append(chromeOpts, chromedp.Flag("profile-directory", profileName)) + } + } + allocCtx, allocCancel := chromedp.NewExecAllocator(context.Background(), chromeOpts...) ba.cancel = allocCancel ctx, cancel = chromedp.NewContext(allocCtx) diff --git a/internal/auth/chrome_linux.go b/internal/auth/chrome_linux.go index c14e50e..a781449 100644 --- a/internal/auth/chrome_linux.go +++ b/internal/auth/chrome_linux.go @@ -57,3 +57,35 @@ func getChromePath() string { } return "" } + +// getBrowserPathForProfile returns the appropriate browser executable for a given browser type +func getBrowserPathForProfile(browserName string) string { + switch browserName { + case "Brave": + // Try Brave paths + bravePaths := []string{"brave-browser", "brave"} + for _, name := range bravePaths { + if path, err := exec.LookPath(name); err == nil { + return path + } + } + case "Chrome Canary": + // Chrome Canary is typically not available on Linux + // Fall back to regular Chrome + return getChromePath() + } + + // Fallback to any Chrome-based browser + return getChromePath() +} + +func getCanaryProfilePath() string { + // Chrome Canary is not typically available on Linux + // Return an empty string or fall back to regular Chrome profile path + return getProfilePath() +} + +func getBraveProfilePath() string { + home, _ := os.UserHomeDir() + return filepath.Join(home, ".config", "BraveSoftware", "Brave-Browser") +} diff --git a/internal/batchexecute/batchexecute.go b/internal/batchexecute/batchexecute.go index ff2011e..f8c3609 100644 --- a/internal/batchexecute/batchexecute.go +++ b/internal/batchexecute/batchexecute.go @@ -18,6 +18,67 @@ import ( // ErrUnauthorized represent an unauthorized request. var ErrUnauthorized = errors.New("unauthorized") +// sanitizeJSON fixes invalid escape sequences in JSON strings. +func sanitizeJSON(input string) string { + // Fix invalid escape sequences inside JSON strings + // We need to be careful to only fix escapes inside quoted strings + var result strings.Builder + inString := false + i := 0 + + for i < len(input) { + c := input[i] + + if c == '"' && (i == 0 || input[i-1] != '\\') { + inString = !inString + result.WriteByte(c) + i++ + continue + } + + if inString && c == '\\' && i+1 < len(input) { + next := input[i+1] + // Check if this is a valid JSON escape sequence + switch next { + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + // Valid escape - keep as is + result.WriteByte(c) + result.WriteByte(next) + i += 2 + case 'u': + // Unicode escape - check if followed by 4 hex digits + if i+5 < len(input) { + hex := input[i+2 : i+6] + valid := true + for _, h := range hex { + if !((h >= '0' && h <= '9') || (h >= 'a' && h <= 'f') || (h >= 'A' && h <= 'F')) { + valid = false + break + } + } + if valid { + result.WriteString(input[i : i+6]) + i += 6 + continue + } + } + // Invalid unicode escape - escape the backslash + result.WriteString("\\\\") + i++ + default: + // Invalid escape sequence - escape the backslash + result.WriteString("\\\\") + i++ + } + } else { + result.WriteByte(c) + i++ + } + } + + return result.String() +} + // RPC represents a single RPC call type RPC struct { ID string // RPC endpoint ID @@ -347,6 +408,9 @@ func decodeResponse(raw string) ([]Response, error) { return nil, fmt.Errorf("empty response after trimming prefix") } + // Sanitize JSON to fix invalid escape sequences in server responses + raw = sanitizeJSON(raw) + // Try to parse as a chunked response first if isDigit(rune(raw[0])) { reader := strings.NewReader(raw) diff --git a/internal/batchexecute/chunked.go b/internal/batchexecute/chunked.go index 99612d7..2360f9b 100644 --- a/internal/batchexecute/chunked.go +++ b/internal/batchexecute/chunked.go @@ -327,8 +327,8 @@ func processChunks(chunks []string) ([]Response, error) { // Process each chunk for _, chunk := range chunks { - // Try to fix any common escaping issues before parsing - chunk = strings.ReplaceAll(chunk, "\\\"", "\"") + // Sanitize JSON to fix invalid escape sequences in server responses + chunk = sanitizeJSON(chunk) // Remove any outer quotes if present trimmed := strings.TrimSpace(chunk) diff --git a/internal/rpc/grpcendpoint/handler.go b/internal/rpc/grpcendpoint/handler.go index 8e69434..bc09191 100644 --- a/internal/rpc/grpcendpoint/handler.go +++ b/internal/rpc/grpcendpoint/handler.go @@ -8,6 +8,8 @@ import ( "net/http" "net/url" "strings" + + "github.com/tmc/nlm/internal/rpc" ) // Client handles gRPC-style endpoint requests @@ -40,10 +42,13 @@ func (c *Client) Execute(req Request) ([]byte, error) { // Build the full URL with the endpoint fullURL := baseURL + req.Endpoint + // Get API parameters dynamically + apiParams := rpc.GetAPIParams(c.cookies) + // Add query parameters params := url.Values{} - params.Set("bl", "boq_labs-tailwind-frontend_20250903.07_p0") - params.Set("f.sid", "-2216531235646590877") // This may need to be dynamic + params.Set("bl", apiParams.BuildVersion) + params.Set("f.sid", apiParams.SessionID) params.Set("hl", "en") params.Set("_reqid", fmt.Sprintf("%d", generateRequestID())) params.Set("rt", "c") @@ -77,9 +82,10 @@ func (c *Client) Execute(req Request) ([]byte, error) { httpReq.Header.Set("Accept-Language", "en-US,en;q=0.9") if c.debug { - fmt.Printf("=== gRPC Request ===\n") + fmt.Printf("=== gRPC Endpoint Request ===\n") fmt.Printf("URL: %s\n", fullURL) - fmt.Printf("Body: %s\n", formData.Encode()) + fmt.Printf("f.req (raw JSON): %s\n", string(bodyJSON)) + fmt.Printf("Body (URL-encoded): %s\n", formData.Encode()) } // Send the request @@ -105,7 +111,51 @@ func (c *Client) Execute(req Request) ([]byte, error) { return nil, fmt.Errorf("request failed with status %d: %s", resp.StatusCode, string(body)) } - return body, nil + // Strip the )]}' prefix that Google adds to prevent JSON hijacking + bodyStr := string(body) + if strings.HasPrefix(bodyStr, ")]}'") { + bodyStr = strings.TrimPrefix(bodyStr, ")]}'") + bodyStr = strings.TrimLeft(bodyStr, "\n") + } + + // Response is in chunked format: \n\n\n... + // Extract the first JSON chunk which contains the actual response + lines := strings.SplitN(bodyStr, "\n", 3) + if len(lines) >= 2 { + // First line is the length, second line is the JSON + bodyStr = lines[1] + } + + // Parse the batchexecute response format: [["wrb.fr",null,"",...]]] + // We need to extract the json_data (third element) + var outerArray [][]interface{} + if err := json.Unmarshal([]byte(bodyStr), &outerArray); err != nil { + return nil, fmt.Errorf("parse outer response: %w", err) + } + + if len(outerArray) == 0 || len(outerArray[0]) < 3 { + return nil, fmt.Errorf("invalid response format: expected [['wrb.fr',null,'data',...]]") + } + + // The third element (index 2) contains the JSON string we need + dataStr, ok := outerArray[0][2].(string) + if !ok { + return nil, fmt.Errorf("invalid response data type: expected string") + } + + if c.debug { + fmt.Printf("=== gRPC Endpoint Response ===\n") + fmt.Printf("Extracted data: %s\n", dataStr[:min(300, len(dataStr))]) + } + + return []byte(dataStr), nil +} + +func min(a, b int) int { + if a < b { + return a + } + return b } // StreamResponse handles streaming responses from gRPC endpoints @@ -113,10 +163,13 @@ func (c *Client) Stream(req Request, handler func(chunk []byte) error) error { baseURL := "https://notebooklm.google.com/_/LabsTailwindUi/data" fullURL := baseURL + req.Endpoint + // Get API parameters dynamically + apiParams := rpc.GetAPIParams(c.cookies) + // Add query parameters params := url.Values{} - params.Set("bl", "boq_labs-tailwind-frontend_20250903.07_p0") - params.Set("f.sid", "-2216531235646590877") + params.Set("bl", apiParams.BuildVersion) + params.Set("f.sid", apiParams.SessionID) params.Set("hl", "en") params.Set("_reqid", fmt.Sprintf("%d", generateRequestID())) params.Set("rt", "c") @@ -188,25 +241,30 @@ func generateRequestID() int { } // BuildChatRequest builds a request for the GenerateFreeFormStreamed endpoint +// Browser format: [null,"[[[[\"source_id\"]]],\"prompt\",null,[2,null,[1]]]"] func BuildChatRequest(sourceIDs []string, prompt string) interface{} { - // Build the array of source IDs - sources := make([][]string, len(sourceIDs)) - for i, id := range sourceIDs { - sources[i] = []string{id} + // Build the nested source IDs array with 3 levels of wrapping + // innerArray adds 1 level, so we need 3 wraps to get 4 levels total + // Format: [[[source_id1, source_id2, ...]]] + var sourceIDsInner []interface{} + for _, id := range sourceIDs { + sourceIDsInner = append(sourceIDsInner, id) } + // 3 wraps: [[[ids]]] -> becomes [[[[ids]]]] in innerArray + sourceIDsNested := []interface{}{[]interface{}{sourceIDsInner}} - // Return the formatted request - // Format: [null, "[[sources], prompt, null, [2]]"] + // Build the inner array: [[[[sources]]], prompt, null, [2,null,[1]]] innerArray := []interface{}{ - sources, + sourceIDsNested, prompt, nil, - []int{2}, + []interface{}{2, nil, []interface{}{1}}, } // Marshal the inner array to JSON string innerJSON, _ := json.Marshal(innerArray) + // Final format: [null, "inner_json_string"] return []interface{}{ nil, string(innerJSON), diff --git a/internal/rpc/rpc.go b/internal/rpc/rpc.go index 78574a2..a1c6970 100644 --- a/internal/rpc/rpc.go +++ b/internal/rpc/rpc.go @@ -3,11 +3,161 @@ package rpc import ( "encoding/json" "fmt" + "io" + "net/http" + "os" + "regexp" + "strings" + "sync" "github.com/davecgh/go-spew/spew" "github.com/tmc/nlm/internal/batchexecute" ) +// Default API parameters - used as fallback if extraction fails +const ( + DefaultBuildVersion = "boq_labs-tailwind-frontend_20251120.08_p0" + DefaultSessionID = "-8913782897795119716" +) + +// APIParams holds dynamically extracted API parameters +type APIParams struct { + BuildVersion string // bl parameter + SessionID string // f.sid parameter +} + +var ( + cachedParams *APIParams + paramsMutex sync.Mutex +) + +// GetAPIParams returns API parameters, either from cache, env vars, or by fetching from NotebookLM +func GetAPIParams(cookies string) *APIParams { + paramsMutex.Lock() + defer paramsMutex.Unlock() + + // Return cached if available + if cachedParams != nil { + return cachedParams + } + + // Check environment variables first + bl := os.Getenv("NLM_BUILD_VERSION") + sid := os.Getenv("NLM_SESSION_ID") + + if bl != "" && sid != "" { + cachedParams = &APIParams{BuildVersion: bl, SessionID: sid} + return cachedParams + } + + // Try to fetch from NotebookLM page + if cookies != "" { + if params := fetchAPIParamsFromPage(cookies); params != nil { + cachedParams = params + return cachedParams + } + } + + // Fallback to defaults + cachedParams = &APIParams{ + BuildVersion: DefaultBuildVersion, + SessionID: DefaultSessionID, + } + return cachedParams +} + +// fetchAPIParamsFromPage extracts bl and f.sid from the NotebookLM HTML page +func fetchAPIParamsFromPage(cookies string) *APIParams { + req, err := http.NewRequest("GET", "https://notebooklm.google.com/", nil) + if err != nil { + return nil + } + + req.Header.Set("Cookie", cookies) + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil + } + + html := string(body) + params := &APIParams{} + + // Extract build version (bl) - pattern: "cfb2h":"boq_labs-tailwind-frontend_..." + blRegex := regexp.MustCompile(`"cfb2h":"(boq_labs-tailwind-frontend_[^"]+)"`) + if matches := blRegex.FindStringSubmatch(html); len(matches) > 1 { + params.BuildVersion = matches[1] + } + + // Extract session ID (f.sid) - pattern: "FdrFJe":"-1234567890" + sidRegex := regexp.MustCompile(`"FdrFJe":"(-?\d+)"`) + if matches := sidRegex.FindStringSubmatch(html); len(matches) > 1 { + params.SessionID = matches[1] + } + + // Also try alternative patterns if primary ones fail + if params.BuildVersion == "" { + // Try: bl=boq_labs... in script + blAltRegex := regexp.MustCompile(`bl['":\s=]+['"]?(boq_labs-tailwind-frontend_[^'"&\s]+)`) + if matches := blAltRegex.FindStringSubmatch(html); len(matches) > 1 { + params.BuildVersion = matches[1] + } + } + + if params.SessionID == "" { + // Try: f.sid= pattern + sidAltRegex := regexp.MustCompile(`f\.sid['":\s=]+['"]?(-?\d+)`) + if matches := sidAltRegex.FindStringSubmatch(html); len(matches) > 1 { + params.SessionID = matches[1] + } + } + + // Only return if we got at least one value + if params.BuildVersion != "" || params.SessionID != "" { + // Fill in defaults for missing values + if params.BuildVersion == "" { + params.BuildVersion = DefaultBuildVersion + } + if params.SessionID == "" { + params.SessionID = DefaultSessionID + } + if os.Getenv("NLM_DEBUG") != "" { + fmt.Printf("DEBUG: Extracted API params - bl: %s, f.sid: %s\n", + params.BuildVersion[:min(50, len(params.BuildVersion))], params.SessionID) + } + return params + } + + return nil +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// ClearAPIParamsCache clears the cached API parameters (useful for refresh) +func ClearAPIParamsCache() { + paramsMutex.Lock() + defer paramsMutex.Unlock() + cachedParams = nil +} + +// Helper to check if a string contains NotebookLM-related content +func isNotebookLMPage(html string) bool { + return strings.Contains(html, "notebooklm") || strings.Contains(html, "LabsTailwind") +} + // RPC endpoint IDs for NotebookLM services const ( // NotebookLM service - Project/Notebook operations @@ -17,6 +167,7 @@ const ( RPCDeleteProjects = "WWINqb" // DeleteProjects RPCMutateProject = "s0tc2d" // MutateProject RPCRemoveRecentlyViewed = "fejl7e" // RemoveRecentlyViewedProject + RPCRegisterBinarySource = "o4cbdc" // RegisterBinarySource (for file upload) // NotebookLM service - Source operations RPCAddSources = "izAoDd" // AddSources @@ -100,9 +251,11 @@ type Client struct { client *batchexecute.Client } -// New creates a new NotebookLM RPC client // New creates a new NotebookLM RPC client func New(authToken, cookies string, options ...batchexecute.Option) *Client { + // Get API parameters dynamically (from env, page extraction, or defaults) + params := GetAPIParams(cookies) + config := batchexecute.Config{ Host: "notebooklm.google.com", App: "LabsTailwindUi", @@ -119,12 +272,9 @@ func New(authToken, cookies string, options ...batchexecute.Option) *Client { "pragma": "no-cache", }, URLParams: map[string]string{ - // Update to January 2025 build version - "bl": "boq_labs-tailwind-frontend_20250129.00_p0", - "f.sid": "-7121977511756781186", + "bl": params.BuildVersion, + "f.sid": params.SessionID, "hl": "en", - // Omit rt parameter for JSON array format (easier to parse) - // "rt": "c", // Use "c" for chunked format, omit for JSON array }, } return &Client{