Skip to content

Commit

Permalink
jupyter notebook support - cli
Browse files Browse the repository at this point in the history
  • Loading branch information
DetachHead committed Feb 2, 2025
1 parent b7660d4 commit 6e07d45
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 72 deletions.
25 changes: 25 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/pyright-internal/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
},
"dependencies": {
"@actions/core": "^1.10.1",
"@jupyterlab/nbformat": "^4.3.5",
"@yarnpkg/fslib": "2.10.4",
"@yarnpkg/libzip": "2.3.0",
"chalk": "^4.1.2",
Expand Down
124 changes: 84 additions & 40 deletions packages/pyright-internal/src/analyzer/program.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import { ImportResult, ImportType } from './importResult';
import { getDocString } from './parseTreeUtils';
import { ISourceFileFactory } from './programTypes';
import { Scope } from './scope';
import { IPythonMode, SourceFile } from './sourceFile';
import { getIPythonCells, IPythonMode, SourceFile } from './sourceFile';
import { SourceFileInfo } from './sourceFileInfo';
import { createChainedByList, isUserCode, verifyNoCyclesInChainedFiles } from './sourceFileInfoUtils';
import { SourceMapper } from './sourceMapper';
Expand Down Expand Up @@ -332,47 +332,73 @@ export class Program {
return fileInfo;
}

addTrackedFile(
fileUri: Uri,
isThirdPartyImport = false,
isInPyTypedPackage = false,
isTypeshedFile = false
): SourceFile {
let sourceFileInfo = this.getSourceFileInfo(fileUri);
const moduleImportInfo = this._getModuleImportInfoForFile(fileUri);
const importName = moduleImportInfo.moduleName;

if (sourceFileInfo) {
// The module name may have changed based on updates to the
// search paths, so update it here.
sourceFileInfo.sourceFile.setModuleName(importName);
sourceFileInfo.isTracked = true;
return sourceFileInfo.sourceFile;
addTrackedFile(fileUri: Uri, isThirdPartyImport = false, isInPyTypedPackage = false, isTypeshedFile = false) {
const cells = getIPythonCells(this.fileSystem, fileUri, this.console);
const sourceFileInfos: SourceFileInfo[] = [];
if (cells) {
cells.forEach((_, index) => {
const cellUri = fileUri.withFragment(index.toString());
const importName = this._getImportNameForNewSourceFile(cellUri);
if (!importName) {
return; // continue
}
const sourceFile = this._sourceFileFactory.createSourceFile(
this.serviceProvider,
cellUri,
importName,
isThirdPartyImport,
isInPyTypedPackage,
this.baselineHandler,
this._editModeTracker,
this._console,
this._logTracker,
IPythonMode.CellDocs
);
sourceFileInfos.push(
new SourceFileInfo(
sourceFile,
isTypeshedFile,
isThirdPartyImport,
isInPyTypedPackage,
this._editModeTracker,
{
isTracked: true,
chainedSourceFile: sourceFileInfos[index - 1],
}
)
);
});
} else {
const importName = this._getImportNameForNewSourceFile(fileUri);
if (!importName) {
return;
}
const sourceFile = this._sourceFileFactory.createSourceFile(
this.serviceProvider,
fileUri,
importName,
isThirdPartyImport,
isInPyTypedPackage,
this.baselineHandler,
this._editModeTracker,
this._console,
this._logTracker
);
sourceFileInfos.push(
new SourceFileInfo(
sourceFile,
isTypeshedFile,
isThirdPartyImport,
isInPyTypedPackage,
this._editModeTracker,
{
isTracked: true,
}
)
);
}

const sourceFile = this._sourceFileFactory.createSourceFile(
this.serviceProvider,
fileUri,
importName,
isThirdPartyImport,
isInPyTypedPackage,
this.baselineHandler,
this._editModeTracker,
this._console,
this._logTracker
);
sourceFileInfo = new SourceFileInfo(
sourceFile,
isTypeshedFile,
isThirdPartyImport,
isInPyTypedPackage,
this._editModeTracker,
{
isTracked: true,
}
);
this._addToSourceFileListAndMap(sourceFileInfo);
return sourceFile;
sourceFileInfos.forEach((sourceFileInfo) => this._addToSourceFileListAndMap(sourceFileInfo));
}

setFileOpened(fileUri: Uri, version: number | null, contents: string, options?: OpenFileOptions) {
Expand Down Expand Up @@ -1027,6 +1053,24 @@ export class Program {
this.serviceProvider.tryGet(ServiceKeys.stateMutationListeners)?.forEach((l) => l.onClearCache?.());
}

/**
* @returns `undefined` if the source file is already tracked
*/
private _getImportNameForNewSourceFile = (fileUri: Uri): string | undefined => {
const sourceFileInfo = this.getSourceFileInfo(fileUri);
const moduleImportInfo = this._getModuleImportInfoForFile(fileUri);
const importName = moduleImportInfo.moduleName;

if (sourceFileInfo) {
// The module name may have changed based on updates to the
// search paths, so update it here.
sourceFileInfo.sourceFile.setModuleName(importName);
sourceFileInfo.isTracked = true;
return undefined;
}
return importName;
};

private _handleMemoryHighUsage() {
const cacheUsage = this._cacheManager.getCacheUsage();
const usedHeapRatio = this._cacheManager.getUsedHeapRatio(
Expand Down
105 changes: 82 additions & 23 deletions packages/pyright-internal/src/analyzer/sourceFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import { SymbolTable } from './symbol';
import { TestWalker } from './testWalker';
import { TypeEvaluator } from './typeEvaluatorTypes';
import { BaselineHandler } from '../baseline';
import { INotebookContent } from '@jupyterlab/nbformat';

// Limit the number of import cycles tracked per source file.
const _maxImportCyclesPerFile = 4;
Expand All @@ -77,6 +78,60 @@ export enum IPythonMode {
CellDocs,
}

/**
* gets the content of a file. if it's a notebook, the whole notebook json is returned as it appears in the file
*/
const getFileContent = (fileSystem: FileSystem, uri: Uri, console: ConsoleInterface): string | undefined => {
try {
// Check the file's length before attempting to read its full contents.
const fileStat = fileSystem.statSync(uri);
if (fileStat.size > maxSourceFileSize) {
console.error(
`File length of "${uri}" is ${fileStat.size} ` +
`which exceeds the maximum supported file size of ${maxSourceFileSize}`
);
throw new Error('File larger than max');
}

return fileSystem.readFileSync(uri, 'utf8');
} catch (error) {
return undefined;
}
};

/**
* if this is a notebook, gets the cells in this file that contain python code.
* note that each {@link SourceFile} is an individual cell, but this function returns
* all of them
* @returns `undefined` if not a notebook
*/
export const getIPythonCells = (fileSystem: FileSystem, uri: Uri, console: ConsoleInterface) => {
if (!uri.hasExtension('.ipynb')) {
return undefined;
}
const fileContent = getFileContent(fileSystem, uri, console);
if (!fileContent) {
return undefined;
}
const parsedNotebook = JSON.parse(fileContent) as INotebookContent;
return parsedNotebook.cells.filter(
(cell) =>
cell.cell_type === 'code' &&
// i guess there's no standard way to specify the language of individual cells? so we check the metadata vscode adds for
// cells that have a different language to the notebook's language. idk if this is supported in any other editor tho
(typeof cell.metadata.vscode !== 'object' ||
cell.metadata.vscode === null ||
!('languageId' in cell.metadata.vscode) ||
cell.metadata.vscode.languageId)
);
};

/**
* gets the cell index from a notebook uri. must have a fragment containing the index. not used by the language server because
* it has its own fake notebook uri format
*/
export const getCellIndex = (uri: Uri): number => Number(uri.fragment);

// A monotonically increasing number used to create unique file IDs.
let nextUniqueFileId = 1;

Expand Down Expand Up @@ -403,8 +458,9 @@ export class SourceFile {
// that of the previous contents.
try {
// Read the file's contents.
if (this.fileSystem.existsSync(this._uri)) {
const fileContents = this.fileSystem.readFileSync(this._uri, 'utf8');
const uri = this._getRealUri();
if (this.fileSystem.existsSync(uri)) {
const fileContents = this.fileSystem.readFileSync(uri, 'utf8');

if (fileContents.length !== this._writableData.lastFileContentLength) {
return true;
Expand Down Expand Up @@ -483,29 +539,23 @@ export class SourceFile {
return this._writableData.clientDocumentContents;
}

/**
* gets the content of the source file. if it's a notebook, the content of this source file's {@link _ipythonCellIndex} is returned
*/
getFileContent(): string | undefined {
// Get current buffer content if the file is opened.
const openFileContent = this.getOpenFileContents();
if (openFileContent !== undefined) {
return openFileContent;
}

// Otherwise, get content from file system.
try {
// Check the file's length before attempting to read its full contents.
const fileStat = this.fileSystem.statSync(this._uri);
if (fileStat.size > maxSourceFileSize) {
this._console.error(
`File length of "${this._uri}" is ${fileStat.size} ` +
`which exceeds the maximum supported file size of ${maxSourceFileSize}`
);
throw new Error('File larger than max');
if (this._ipythonMode === IPythonMode.None) {
// Get current buffer content if the file is opened.
const openFileContent = this.getOpenFileContents();
if (openFileContent !== undefined) {
return openFileContent;
}

return this.fileSystem.readFileSync(this._uri, 'utf8');
} catch (error) {
return undefined;
// Otherwise, get content from file system.
return getFileContent(this.fileSystem, this._uri, this._console);
}
//TODO: this isnt ideal because it re-reads the file for each cell which is unnecessary
const source = getIPythonCells(this.fileSystem, this._getRealUri(), this._console)?.[getCellIndex(this._uri)]
.source;
return typeof source === 'string' ? source : source?.join('');
}

setClientVersion(version: number | null, contents: string): void {
Expand Down Expand Up @@ -673,7 +723,7 @@ export class SourceFile {
configOptions,
this._uri,
fileContents!,
this._ipythonMode,
this.getIPythonMode(),
diagSink
);

Expand Down Expand Up @@ -965,6 +1015,15 @@ export class SourceFile {
return new TextRangeDiagnosticSink(lines);
}

/**
* if this source file represents an ipython cell and we're in the cli instead of the language server,
* we need to create a fake uri to distinguish between them.
*/
private _getRealUri = () =>
// when using the language server, a fake uri is assigned automatically and this method *should* never get called
// because we don't read the files from the disk directly, but we check this anyway just in case
this._writableData.clientDocumentContents ? this._uri : this._uri.withFragment('');

// Creates a short string that can be used to uniquely identify
// this file from all other files. It is used in the type evaluator
// to distinguish between types that are defined in different files
Expand Down
Loading

0 comments on commit 6e07d45

Please sign in to comment.