Skip to content

Commit 56d44da

Browse files
authored
Support other data types for DataViewer (#5093)
For #4677 Add code for other data types Add new functional tests for those data types <!-- If an item below does not apply to you, then go ahead and check it off as "done" and strikethrough the text, e.g.: - [x] ~Has unit tests & system/integration tests~ --> - [x] Pull request represents a single change (i.e. not fixing disparate/unrelated things in a single PR) - [x] Title summarizes what is changing - [x] Has a [news entry](https://github.com/Microsoft/vscode-python/tree/master/news) file (remember to thank yourself!) - [ ] Has sufficient logging. - [ ] Has telemetry for enhancements. - [x] Unit tests & system/integration tests are added/updated - [ ] [Test plan](https://github.com/Microsoft/vscode-python/blob/master/.github/test_plan.md) is updated as appropriate - [ ] [`package-lock.json`](https://github.com/Microsoft/vscode-python/blob/master/package-lock.json) has been regenerated by running `npm install` (if dependencies have changed) - [ ] The wiki is updated with any design decisions/details.
1 parent db9706a commit 56d44da

27 files changed

Lines changed: 1080 additions & 851 deletions

news/1 Enhancements/4677.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Add preliminary support for viewing dataframes.
1+
Add support for viewing dataframes, lists, dicts, nparrays.

package-lock.json

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2397,6 +2397,7 @@
23972397
"mocha-junit-reporter": "^1.17.0",
23982398
"mocha-multi-reporters": "^1.1.7",
23992399
"node-has-native-dependencies": "^1.0.2",
2400+
"node-html-parser": "^1.1.13",
24002401
"nyc": "^13.3.0",
24012402
"raw-loader": "^0.5.1",
24022403
"react": "^16.5.2",

package.nls.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@
226226
"DataScience.dataExplorerInvalidVariableFormat" : "'{0}' is not an active variable.",
227227
"DataScience.jupyterGetVariablesExecutionError" : "Failure during variable extraction:\r\n{0}",
228228
"DataScience.loadingMessage" : "loading ...",
229-
"DataScience.noRowsInDataExplorer" : "Fetching data ...",
229+
"DataScience.noRowsInDataViewer" : "Fetching data ...",
230230
"DataScience.pandasTooOldForViewingFormat" : "Python package 'pandas' is version {0}. Version 0.20 or greater is required for viewing data.",
231-
"DataScience.pandasRequiredForViewing" : "Python package 'pandas' is required for viewing data."
231+
"DataScience.pandasRequiredForViewing" : "Python package 'pandas' is required for viewing data.",
232+
"DataScience.valuesColumn": "values"
232233
}
Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,35 @@
11
# Query Jupyter server for the info about a dataframe
22
import json as _VSCODE_json
3+
import pandas as _VSCODE_pd
34

45
# In IJupyterVariables.getValue this '_VSCode_JupyterTestValue' will be replaced with the json stringified value of the target variable
56
# Indexes off of _VSCODE_targetVariable need to index types that are part of IJupyterVariable
67
_VSCODE_targetVariable = _VSCODE_json.loads('_VSCode_JupyterTestValue')
78
_VSCODE_evalResult = eval(_VSCODE_targetVariable['name'])
89

910
# First list out the columns of the data frame (assuming it is one for now)
10-
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
11-
_VSCODE_columnNames = list(_VSCODE_evalResult)
11+
_VSCODE_columnTypes = []
12+
_VSCODE_columnNames = []
13+
if _VSCODE_targetVariable['type'] == 'list':
14+
_VSCODE_columnTypes = ['string'] # Might be able to be more specific here?
15+
_VSCODE_columnNames = ['_VSCode_JupyterValuesColumn']
16+
elif _VSCODE_targetVariable['type'] == 'Series':
17+
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
18+
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
19+
_VSCODE_columnNames = list(_VSCODE_evalResult)
20+
elif _VSCODE_targetVariable['type'] == 'dict':
21+
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
22+
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
23+
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
24+
_VSCODE_columnNames = list(_VSCODE_evalResult)
25+
elif _VSCODE_targetVariable['type'] == 'ndarray':
26+
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
27+
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
28+
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
29+
_VSCODE_columnNames = list(_VSCODE_evalResult)
30+
elif _VSCODE_targetVariable['type'] == 'DataFrame':
31+
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
32+
_VSCODE_columnNames = list(_VSCODE_evalResult)
1233

1334
# Make sure we have an index column (see code in getJupyterVariableDataFrameRows.py)
1435
if 'index' not in _VSCODE_columnNames:
@@ -17,13 +38,13 @@
1738

1839
# Then loop and generate our output json
1940
_VSCODE_columns = []
20-
for n in range(0, len(_VSCODE_columnNames)):
21-
c = _VSCODE_columnNames[n]
22-
t = _VSCODE_columnTypes[n]
41+
for _VSCODE_n in range(0, len(_VSCODE_columnNames)):
42+
_VSCODE_column_name = _VSCODE_columnNames[_VSCODE_n]
43+
_VSCODE_column_type = _VSCODE_columnTypes[_VSCODE_n]
2344
_VSCODE_colobj = {}
24-
_VSCODE_colobj['key'] = c
25-
_VSCODE_colobj['name'] = c
26-
_VSCODE_colobj['type'] = str(t)
45+
_VSCODE_colobj['key'] = _VSCODE_column_name
46+
_VSCODE_colobj['name'] = _VSCODE_column_name
47+
_VSCODE_colobj['type'] = str(_VSCODE_column_type)
2748
_VSCODE_columns.append(_VSCODE_colobj)
2849

2950
del _VSCODE_columnNames
@@ -33,20 +54,13 @@
3354
_VSCODE_targetVariable['columns'] = _VSCODE_columns
3455
del _VSCODE_columns
3556

36-
# Figure out shape if not already there
37-
if 'shape' not in _VSCODE_targetVariable:
38-
_VSCODE_targetVariable['shape'] = str(_VSCODE_evalResult.shape)
39-
40-
# Row count is actually embedded in shape. Should be the second number
41-
import re as _VSCODE_re
42-
_VSCODE_regex = r"\(\s*(\d+),\s*(\d+)\s*\)"
43-
_VSCODE_matches = _VSCODE_re.search(_VSCODE_regex, _VSCODE_targetVariable['shape'])
44-
if (_VSCODE_matches):
45-
_VSCODE_targetVariable['rowCount'] = int(_VSCODE_matches[1])
46-
del _VSCODE_matches
57+
# Figure out shape if not already there. Use the shape to compute the row count
58+
if (hasattr(_VSCODE_evalResult, "shape")):
59+
_VSCODE_targetVariable['rowCount'] = _VSCODE_evalResult.shape[0]
60+
elif _VSCODE_targetVariable['type'] == 'list':
61+
_VSCODE_targetVariable['rowCount'] = len(_VSCODE_evalResult)
4762
else:
4863
_VSCODE_targetVariable['rowCount'] = 0
49-
del _VSCODE_regex
5064

5165
# Transform this back into a string
5266
print(_VSCODE_json.dumps(_VSCODE_targetVariable))

pythonFiles/datascience/getJupyterVariableDataFrameRows.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Query Jupyter server for the rows of a data frame
22
import json as _VSCODE_json
3+
import pandas as _VSCODE_pd
34
import pandas.io.json as _VSCODE_pd_json
45

56
# In IJupyterVariables.getValue this '_VSCode_JupyterTestValue' will be replaced with the json stringified value of the target variable
@@ -12,12 +13,29 @@
1213
_VSCODE_startRow = max(_VSCode_JupyterStartRow, 0)
1314
_VSCODE_endRow = min(_VSCode_JupyterEndRow, _VSCODE_targetVariable['rowCount'])
1415

16+
# Assume we have a dataframe. If not, turn our eval result into a dataframe
17+
_VSCODE_df = _VSCODE_evalResult
18+
if (_VSCODE_targetVariable['type'] == 'list'):
19+
_VSCODE_df = _VSCODE_pd.DataFrame({'_VSCode_JupyterValuesColumn':_VSCODE_evalResult})
20+
elif (_VSCODE_targetVariable['type'] == 'Series'):
21+
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
22+
elif _VSCODE_targetVariable['type'] == 'dict':
23+
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
24+
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
25+
elif _VSCODE_targetVariable['type'] == 'ndarray':
26+
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
27+
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
28+
# If not a known type, then just let pandas handle it.
29+
elif not (hasattr(_VSCODE_df, 'iloc')):
30+
_VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)
31+
1532
# Turn into JSON using pandas. We use pandas because it's about 3 orders of magnitude faster to turn into JSON
16-
_VSCODE_rows = df.iloc[_VSCODE_startRow:_VSCODE_endRow]
33+
_VSCODE_rows = _VSCODE_df.iloc[_VSCODE_startRow:_VSCODE_endRow]
1734
_VSCODE_result = _VSCODE_pd_json.to_json(None, _VSCODE_rows, orient='table', date_format='iso')
1835
print(_VSCODE_result)
1936

2037
# Cleanup our variables
38+
del _VSCODE_df
2139
del _VSCODE_endRow
2240
del _VSCODE_startRow
2341
del _VSCODE_rows

0 commit comments

Comments
 (0)