[go: nahoru, domu]

Skip to content

Commit

Permalink
docs: correct the params rendering for ml.remote and ml.ensemble
Browse files Browse the repository at this point in the history
…modules (#248)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
  - `ensemble.RandomForestClassifier`: https://screenshot.googleplex.com/4Q88xgdm5hkaYXu
  - `ensemble.RandomForestRegressor`: https://screenshot.googleplex.com/3CU6pJBjYHQvnDo
  - `remote.VertexAIModel`: https://screenshot.googleplex.com/8SL2max6GfPMwFe

Fixes internal issue 314150462 🦕
  • Loading branch information
ashleyxuu committed Dec 5, 2023
1 parent 8d81e24 commit c2829e3
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 46 deletions.
8 changes: 4 additions & 4 deletions bigframes/ml/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ class VertexAIModel(base.BaseEstimator):
Args:
endpoint (str):
Vertex AI https endpoint.
input ({column_name: column_type}):
Input schema. Supported types are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
output ({column_name: column_type}):
Output label schema. Supported the same types as the input.
input (Mapping):
Input schema: `{column_name: column_type}`. Supported types are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
output (Mapping):
Output label schema: `{column_name: column_type}`. Supported the same types as the input.
session (bigframes.Session or None):
BQ session to create the model. If None, use the global default session.
connection_name (str or None):
Expand Down
12 changes: 6 additions & 6 deletions docs/templates/toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,6 @@
- name: PaLM2TextEmbeddingGenerator
uid: bigframes.ml.llm.PaLM2TextEmbeddingGenerator
name: llm
- items:
- name: Overview
uid: bigframes.ml.remote
- name: VertexAIModel
uid: bigframes.ml.remote.VertexAIModel
name: remote
- items:
- name: metrics
uid: bigframes.ml.metrics
Expand Down Expand Up @@ -144,6 +138,12 @@
- name: OneHotEncoder
uid: bigframes.ml.preprocessing.OneHotEncoder
name: preprocessing
- items:
- name: Overview
uid: bigframes.ml.remote
- name: VertexAIModel
uid: bigframes.ml.remote.VertexAIModel
name: remote
name: bigframes.ml
name: BigQuery DataFrames
status: beta
72 changes: 36 additions & 36 deletions third_party/bigframes_vendored/sklearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,16 @@ def fit(self, X, y):
"""Build a forest of trees from the training set (X, y).
Args:
X:
X (bigframes.dataframe.DataFrame or bigframes.series.Series):
Series or DataFrame of shape (n_samples, n_features). Training data.
y:
y (bigframes.dataframe.DataFrame or bigframes.series.Series):
Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
Target values. Will be cast to X's dtype if necessary.
Returns:
Fitted Estimator.
ForestModel: Fitted Estimator.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

Expand All @@ -73,12 +73,12 @@ def predict(self, X):
mean predicted regression targets of the trees in the forest.
Args:
X:
X (bigframes.dataframe.DataFrame or bigframes.series.Series):
Series or DataFrame of shape (n_samples, n_features). The data matrix for
which we want to get the predictions.
Returns:
The predicted values.
bigframes.dataframe.DataFrame: The predicted values.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

Expand All @@ -91,38 +91,38 @@ class RandomForestRegressor(ForestRegressor):
to improve the predictive accuracy and control over-fitting.
Args:
num_parallel_tree: Optional[int]
num_parallel_tree (Optional[int]):
Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2.
tree_method: Optional[str]
tree_method (Optional[str]):
Specify which tree method to use. Default to "auto". If this parameter is set to
default, XGBoost will choose the most conservative option available. Possible values: ""exact", "approx",
"hist".
min_child_weight : Optional[float]
min_child_weight (Optional[float]):
Minimum sum of instance weight(hessian) needed in a child. Default to 1.
colsample_bytree : Optional[float]
colsample_bytree (Optional[float]):
Subsample ratio of columns when constructing each tree. Default to 1.0. The value should be between 0 and 1.
colsample_bylevel : Optional[float]
colsample_bylevel (Optional[float]):
Subsample ratio of columns for each level. Default to 1.0. The value should be between 0 and 1.
colsample_bynode : Optional[float]
colsample_bynode (Optional[float]):
Subsample ratio of columns for each split. Default to 0.8. The value should be between 0 and 1.
gamma : Optional[float]
gamma (Optional[float]):
(min_split_loss) Minimum loss reduction required to make a further partition on a
leaf node of the tree. Default to 0.0.
max_depth : Optional[int]
max_depth (Optional[int]):
Maximum tree depth for base learners. Default to 15. The value should be greater than 0 and less than 1.
subsample : Optional[float]
subsample (Optional[float]:
Subsample ratio of the training instance. Default to 0.8. The value should be greater than 0 and less than 1.
reg_alpha : Optional[float]
reg_alpha (Optional[float]):
L1 regularization term on weights (xgb's alpha). Default to 0.0.
reg_lambda : Optional[float]
reg_lambda (Optional[float]):
L2 regularization term on weights (xgb's lambda). Default to 1.0.
early_stop: Optional[bool]
early_stop (Optional[bool]):
Whether training should stop after the first iteration. Default to True.
min_rel_progress: Optional[float]
min_rel_progress (Optional[float]):
Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01.
enable_global_explain: Optional[bool]
enable_global_explain (Optional[bool]):
Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False.
xgboost_version: Optional[str]
xgboost_version (Optional[str]):
Specifies the Xgboost version for model training. Default to "0.9". Possible values: "0.9", "1.1".
"""

Expand All @@ -144,7 +144,7 @@ def predict(self, X):
which we want to get the predictions.
Returns:
The predicted values.
bigframes.dataframe.DataFrame: The predicted values.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

Expand All @@ -158,37 +158,37 @@ class RandomForestClassifier(ForestClassifier):
improve the predictive accuracy and control over-fitting.
Args:
num_parallel_tree: Optional[int]
num_parallel_tree (Optional[int]):
Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2.
tree_method: Optional[str]
tree_method (Optional[str]):
Specify which tree method to use. Default to "auto". If this parameter is set to
default, XGBoost will choose the most conservative option available. Possible values: ""exact", "approx",
"hist".
min_child_weight : Optional[float]
min_child_weight (Optional[float]):
Minimum sum of instance weight(hessian) needed in a child. Default to 1.
colsample_bytree : Optional[float]
colsample_bytree (Optional[float]):
Subsample ratio of columns when constructing each tree. Default to 1.0. The value should be between 0 and 1.
colsample_bylevel : Optional[float]
colsample_bylevel (Optional[float]):
Subsample ratio of columns for each level. Default to 1.0. The value should be between 0 and 1.
colsample_bynode : Optional[float]
colsample_bynode (Optional[float]):
Subsample ratio of columns for each split. Default to 0.8. The value should be between 0 and 1.
gamma : Optional[float]
gamma (Optional[float]):
(min_split_loss) Minimum loss reduction required to make a further partition on a
leaf node of the tree. Default to 0.0.
max_depth : Optional[int]
max_depth (Optional[int]):
Maximum tree depth for base learners. Default to 15. The value should be greater than 0 and less than 1.
subsample : Optional[float]
subsample (Optional[float]):
Subsample ratio of the training instance. Default to 0.8. The value should be greater than 0 and less than 1.
reg_alpha : Optional[float]
reg_alpha (Optional[float]):
L1 regularization term on weights (xgb's alpha). Default to 0.0.
reg_lambda : Optional[float]
reg_lambda (Optional[float]):
L2 regularization term on weights (xgb's lambda). Default to 1.0.
early_stop: Optional[bool]
early_stop (Optional[bool]):
Whether training should stop after the first iteration. Default to True.
min_rel_progress: Optional[float]
min_rel_progress (Optional[float]):
Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01.
enable_global_explain: Optional[bool]
enable_global_explain (Optional[bool]):
Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False.
xgboost_version: Optional[str]
xgboost_version (Optional[str]):
Specifies the Xgboost version for model training. Default to "0.9". Possible values: "0.9", "1.1".ß
"""

0 comments on commit c2829e3

Please sign in to comment.