Skip to content

Commit a12df2e

Browse files
kinto0facebook-github-bot
authored andcommitted
.ipynb support (facebook#1333)
Summary: facebook#381 Differential Revision: D84929386
1 parent 36f47bf commit a12df2e

File tree

8 files changed

+266
-30
lines changed

8 files changed

+266
-30
lines changed

crates/pyrefly_python/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ ruff_python_ast = { git = "https://github.com/astral-sh/ruff/", rev = "9bee8376a
2222
ruff_python_parser = { git = "https://github.com/astral-sh/ruff/", rev = "9bee8376a17401f9736b45fdefffb62edc2f1668" }
2323
ruff_text_size = { git = "https://github.com/astral-sh/ruff/", rev = "9bee8376a17401f9736b45fdefffb62edc2f1668" }
2424
serde = { version = "1.0.219", features = ["derive", "rc"] }
25+
serde_json = { version = "1.0.140", features = ["alloc", "float_roundtrip", "raw_value", "unbounded_depth"] }
2526
starlark_map = "0.13.0"
2627
static_interner = "0.1.1"
2728
thiserror = "2.0.12"
2829

2930
[dev-dependencies]
30-
serde_json = { version = "1.0.140", features = ["alloc", "float_roundtrip", "raw_value", "unbounded_depth"] }
3131
toml = { version = "0.9.2", features = ["preserve_order"] }

crates/pyrefly_python/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@ pub mod module;
2828
pub mod module_name;
2929
pub mod module_path;
3030
pub mod nesting_context;
31+
pub mod notebook;
3132
pub mod qname;
3233
pub mod short_identifier;
3334
pub mod symbol_kind;
3435
pub mod sys_info;
3536

3637
/// Suffixes of python files that we can be processed.
37-
pub const PYTHON_EXTENSIONS: &[&str] = &["py", "pyi"];
38+
pub const PYTHON_EXTENSIONS: &[&str] = &["py", "pyi", "ipynb"];
3839

3940
/// Suffixes of compiled python modules
4041
pub const COMPILED_FILE_SUFFIXES: &[&str] = &["pyc", "pyx", "pyd"];

crates/pyrefly_python/src/module_name.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ impl ModuleName {
204204
None => {}
205205
Some(file_name) => {
206206
let splits: Vec<&str> = file_name.rsplitn(2, '.').collect();
207-
if splits.len() != 2 || !(splits[0] == "py" || splits[0] == "pyi") {
207+
if splits.len() != 2 || !(splits[0] == "py" || splits[0] == "pyi" || splits[0] == "ipynb") {
208208
return Err(anyhow::anyhow!(PathConversionError::InvalidExtension {
209209
file_name: file_name.to_owned(),
210210
}));
@@ -407,8 +407,10 @@ mod tests {
407407
}
408408
assert_module_name("foo.py", "foo");
409409
assert_module_name("foo.pyi", "foo");
410+
assert_module_name("foo.ipynb", "foo");
410411
assert_module_name("foo/bar.py", "foo.bar");
411412
assert_module_name("foo/bar.pyi", "foo.bar");
413+
assert_module_name("foo/bar.ipynb", "foo.bar");
412414
assert_module_name("foo/bar/__init__.py", "foo.bar");
413415
assert_module_name("foo/bar/__init__.pyi", "foo.bar");
414416

crates/pyrefly_python/src/module_path.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ impl ModuleStyle {
6464
if path.extension() == Some("pyi".as_ref()) {
6565
ModuleStyle::Interface
6666
} else {
67+
// Both .py and .ipynb are executable
6768
ModuleStyle::Executable
6869
}
6970
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
// Jupyter notebook parsing support.
9+
//
10+
// This module extracts Python code from `.ipynb` files (Jupyter notebooks)
11+
// and converts them into a single Python source file that can be analyzed
12+
// by the type checker.
13+
14+
use anyhow::Context;
15+
use anyhow::Result;
16+
use serde::Deserialize;
17+
18+
/// Represents a Jupyter notebook cell
19+
#[derive(Debug, Deserialize)]
20+
struct NotebookCell {
21+
cell_type: String,
22+
source: NotebookSource,
23+
}
24+
25+
/// Source can be either a string or an array of strings
26+
#[derive(Debug, Deserialize)]
27+
#[serde(untagged)]
28+
enum NotebookSource {
29+
String(String),
30+
Array(Vec<String>),
31+
}
32+
33+
/// Minimal representation of a Jupyter notebook
34+
#[derive(Debug, Deserialize)]
35+
struct Notebook {
36+
cells: Vec<NotebookCell>,
37+
}
38+
39+
/// Extracts Python code from a Jupyter notebook JSON string.
40+
///
41+
/// This function:
42+
/// - Parses the notebook JSON
43+
/// - Extracts only code cells (ignoring markdown cells)
44+
/// - Concatenates all code into a single Python source string
45+
/// - Adds cell markers as comments for debugging
46+
///
47+
/// # Arguments
48+
/// * `content` - The raw JSON content of a `.ipynb` file
49+
///
50+
/// # Returns
51+
/// A single Python source string containing all code cells, or an error if parsing fails
52+
pub fn extract_python_from_notebook(content: &str) -> Result<String> {
53+
let notebook: Notebook =
54+
serde_json::from_str(content).context("Failed to parse notebook JSON")?;
55+
56+
let mut python_code = String::new();
57+
let mut code_cell_count = 0;
58+
59+
for cell in notebook.cells.iter() {
60+
if cell.cell_type == "code" {
61+
code_cell_count += 1;
62+
// Add a comment marker for each cell
63+
python_code.push_str(&format!("# Cell {}\n", code_cell_count));
64+
65+
// Extract the source code
66+
match &cell.source {
67+
NotebookSource::String(s) => {
68+
python_code.push_str(s.as_str());
69+
}
70+
NotebookSource::Array(lines) => {
71+
for line in lines {
72+
python_code.push_str(line.as_str());
73+
}
74+
}
75+
}
76+
77+
// Add spacing between cells
78+
if !python_code.ends_with('\n') {
79+
python_code.push('\n');
80+
}
81+
python_code.push('\n');
82+
}
83+
}
84+
85+
Ok(python_code)
86+
}
87+
88+
#[cfg(test)]
89+
mod tests {
90+
use super::*;
91+
92+
#[test]
93+
fn test_extract_python_from_notebook() {
94+
let notebook_json = r##"{
95+
"cells": [
96+
{
97+
"cell_type": "code",
98+
"source": ["def hello():\n", " return 'world'\n"]
99+
},
100+
{
101+
"cell_type": "markdown",
102+
"source": ["This is a markdown cell"]
103+
},
104+
{
105+
"cell_type": "code",
106+
"source": "x = 5"
107+
}
108+
]
109+
}"##;
110+
111+
let result = extract_python_from_notebook(notebook_json).unwrap();
112+
113+
assert!(result.contains("# Cell 1"));
114+
assert!(result.contains("def hello():"));
115+
assert!(result.contains("return 'world'"));
116+
assert!(!result.contains("This is a markdown cell"));
117+
assert!(result.contains("# Cell 2"));
118+
assert!(result.contains("x = 5"));
119+
}
120+
121+
#[test]
122+
fn test_extract_python_from_empty_notebook() {
123+
let notebook_json = r#"{"cells": []}"#;
124+
let result = extract_python_from_notebook(notebook_json).unwrap();
125+
assert_eq!(result, "");
126+
}
127+
128+
#[test]
129+
fn test_extract_python_with_invalid_json() {
130+
let invalid_json = "not valid json";
131+
let result = extract_python_from_notebook(invalid_json);
132+
assert!(result.is_err());
133+
}
134+
}

lsp/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333
},
3434
"main": "./dist/extension",
3535
"activationEvents": [
36-
"onLanguage:python"
36+
"onLanguage:python",
37+
"onNotebook:jupyter-notebook"
3738
],
3839
"contributes": {
3940
"languages": [

lsp/src/extension.ts

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ async function updateStatusBar() {
4141
const document = vscode.window.activeTextEditor?.document;
4242
if (
4343
document == null ||
44-
document.uri.scheme !== 'file' ||
44+
(document.uri.scheme !== 'file' &&
45+
document.uri.scheme !== 'vscode-notebook-cell') ||
4546
document.languageId !== 'python'
4647
) {
4748
statusBarItem?.hide();
@@ -180,8 +181,21 @@ export async function activate(context: ExtensionContext) {
180181
// Options to control the language client
181182
let clientOptions: LanguageClientOptions = {
182183
initializationOptions: rawInitialisationOptions,
183-
// Register the server for Starlark documents
184-
documentSelector: [{scheme: 'file', language: 'python'}],
184+
// Register the server for Python documents
185+
documentSelector: [
186+
{scheme: 'file', language: 'python'},
187+
// Support for notebook cells
188+
{scheme: 'vscode-notebook-cell', language: 'python'},
189+
],
190+
// Support for notebooks
191+
notebookDocumentSync: {
192+
notebookSelector: [
193+
{
194+
notebook: {notebookType: 'jupyter-notebook'},
195+
cells: [{language: 'python'}],
196+
},
197+
],
198+
},
185199
outputChannel: outputChannel,
186200
middleware: {
187201
workspace: {

0 commit comments

Comments
 (0)