Spaces:
Runtime error
Runtime error
Commit
·
27df543
1
Parent(s):
eb37af6
Push to HF space
Browse files- app.py +281 -0
- requirements.txt +0 -0
app.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Streamlit UI."""
|
| 2 |
+
|
| 3 |
+
from typing import Literal, Optional, Union
|
| 4 |
+
|
| 5 |
+
import lilac as ll
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from datasets import load_dataset_builder
|
| 8 |
+
|
| 9 |
+
# Increase the width of the form a little bit.
|
| 10 |
+
st.markdown(
|
| 11 |
+
"""
|
| 12 |
+
<style>
|
| 13 |
+
.block-container {
|
| 14 |
+
max-width: 54rem;
|
| 15 |
+
}
|
| 16 |
+
[data-testid="stSidebar"][aria-expanded="true"]{
|
| 17 |
+
min-width: 600px;
|
| 18 |
+
}
|
| 19 |
+
</style>
|
| 20 |
+
""",
|
| 21 |
+
unsafe_allow_html=True,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# There are only 2 pages in the app, choosing a dataset, choosing the space.
|
| 25 |
+
PAGES = ['dataset', 'space']
|
| 26 |
+
if 'current_page' not in st.session_state:
|
| 27 |
+
st.session_state.current_page = 'dataset'
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
is_valid_dataset = False
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _get_page():
|
| 34 |
+
return st.session_state.current_page
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _dataset_page():
|
| 38 |
+
global current_page, hf_dataset_name, hf_config_name, hf_split, sample_size, is_valid_dataset
|
| 39 |
+
st.header('Deploy a HuggingFace dataset to a space in Lilac 🌸', anchor=False)
|
| 40 |
+
st.subheader(
|
| 41 |
+
'Step 1: Choose a dataset',
|
| 42 |
+
divider='violet',
|
| 43 |
+
anchor=False,
|
| 44 |
+
help='For a list of datasets see: https://huggingface.co/datasets',
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
hf_dataset_name = st.text_input(
|
| 48 |
+
'HuggingFace dataset',
|
| 49 |
+
help='Either in the format `user/dataset` or `dataset`, for example: `Open-Orca/OpenOrca`',
|
| 50 |
+
placeholder='dataset or user/dataset',
|
| 51 |
+
value=st.session_state.get('hf_dataset_name', None),
|
| 52 |
+
)
|
| 53 |
+
hf_config_name = st.text_input(
|
| 54 |
+
'Config',
|
| 55 |
+
help='Some datasets required this field.',
|
| 56 |
+
placeholder='(optional)',
|
| 57 |
+
value=st.session_state.get('hf_config_name', None),
|
| 58 |
+
)
|
| 59 |
+
hf_split = st.text_input(
|
| 60 |
+
'Split',
|
| 61 |
+
help='Loads all splits by default.',
|
| 62 |
+
placeholder='(optional)',
|
| 63 |
+
value=st.session_state.get('hf_split', None),
|
| 64 |
+
)
|
| 65 |
+
sample_size = st.number_input(
|
| 66 |
+
'Sample size',
|
| 67 |
+
help='Number of rows to sample from the dataset, for each split.',
|
| 68 |
+
placeholder='(optional)',
|
| 69 |
+
min_value=1,
|
| 70 |
+
step=1,
|
| 71 |
+
key='sample_size',
|
| 72 |
+
value=st.session_state.get('sample_size', None),
|
| 73 |
+
)
|
| 74 |
+
hf_read_token = st.text_input(
|
| 75 |
+
'HuggingFace read access token',
|
| 76 |
+
type='password',
|
| 77 |
+
help='The access token is used to authenticate you with HuggingFace to read the dataset. '
|
| 78 |
+
'https://huggingface.co/docs/hub/security-tokens',
|
| 79 |
+
placeholder='(optional if dataset is public)',
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
def _next():
|
| 83 |
+
st.session_state.current_page = 'space'
|
| 84 |
+
st.session_state.hf_dataset_name = hf_dataset_name
|
| 85 |
+
st.session_state.hf_config_name = hf_config_name
|
| 86 |
+
st.session_state.hf_split = hf_split
|
| 87 |
+
st.session_state.sample_size = sample_size
|
| 88 |
+
|
| 89 |
+
def _next_button():
|
| 90 |
+
enabled = is_valid_dataset
|
| 91 |
+
return st.button('Next', disabled=not enabled, type='primary', on_click=_next)
|
| 92 |
+
|
| 93 |
+
ds_builder = None
|
| 94 |
+
if hf_dataset_name:
|
| 95 |
+
is_valid_dataset = False
|
| 96 |
+
try:
|
| 97 |
+
ds_builder = load_dataset_builder(hf_dataset_name, name=hf_config_name, token=hf_read_token)
|
| 98 |
+
is_valid_dataset = True
|
| 99 |
+
except Exception as e:
|
| 100 |
+
st.session_state.ds_error = e
|
| 101 |
+
st.session_state.ds_loaded = False
|
| 102 |
+
|
| 103 |
+
st.session_state.hf_dataset_name = hf_dataset_name
|
| 104 |
+
|
| 105 |
+
_next_button()
|
| 106 |
+
|
| 107 |
+
if ds_builder:
|
| 108 |
+
st.session_state.ds_loaded = True
|
| 109 |
+
st.session_state.ds_error = None
|
| 110 |
+
st.session_state.ds_dataset_name = ds_builder.info.dataset_name
|
| 111 |
+
st.session_state.ds_description = ds_builder.info.description
|
| 112 |
+
st.session_state.ds_features = ds_builder.info.features
|
| 113 |
+
st.session_state.ds_splits = ds_builder.info.splits
|
| 114 |
+
else:
|
| 115 |
+
st.session_state.ds_loaded = False
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _space_page():
|
| 119 |
+
session = dict(st.session_state)
|
| 120 |
+
|
| 121 |
+
def _back():
|
| 122 |
+
st.session_state.hf_space_name = hf_space_name
|
| 123 |
+
st.session_state.hf_storage = hf_storage
|
| 124 |
+
st.session_state.hf_access_token = hf_access_token
|
| 125 |
+
st.session_state.current_page = 'dataset'
|
| 126 |
+
|
| 127 |
+
hf_space_name = st.session_state.get('hf_space_name', None)
|
| 128 |
+
hf_storage = st.session_state.get('hf_storage', None)
|
| 129 |
+
hf_access_token = st.session_state.get('hf_access_token', None)
|
| 130 |
+
|
| 131 |
+
def _back_button():
|
| 132 |
+
return st.button('⬅ Back', on_click=_back)
|
| 133 |
+
|
| 134 |
+
_back_button()
|
| 135 |
+
st.subheader(
|
| 136 |
+
'Step 2: Choose HuggingFace Space settings',
|
| 137 |
+
divider='violet',
|
| 138 |
+
anchor=False,
|
| 139 |
+
help='See HuggingFace Spaces [documentation](https://huggingface.co/docs/hub/spaces-overview)',
|
| 140 |
+
)
|
| 141 |
+
if session['hf_config_name']:
|
| 142 |
+
st.write(f'Config: {session["hf_config_name"]}')
|
| 143 |
+
if st.session_state['hf_split']:
|
| 144 |
+
st.write(f'Split: {session["hf_split"]}')
|
| 145 |
+
if st.session_state.get('sample_size', None):
|
| 146 |
+
st.write(f'Sample size: {session["sample_size"]}')
|
| 147 |
+
|
| 148 |
+
st.write('##### HuggingFace space to create')
|
| 149 |
+
hf_space_name = st.text_input(
|
| 150 |
+
'HuggingFace space name',
|
| 151 |
+
help='This space will be created if it does not exist',
|
| 152 |
+
placeholder='org/name',
|
| 153 |
+
value=hf_space_name,
|
| 154 |
+
)
|
| 155 |
+
hf_access_token = st.text_input(
|
| 156 |
+
'HuggingFace write access token',
|
| 157 |
+
type='password',
|
| 158 |
+
help='The access token is used to authenticate you with HuggingFace to create the space. '
|
| 159 |
+
'https://huggingface.co/docs/hub/security-tokens',
|
| 160 |
+
value=hf_access_token,
|
| 161 |
+
)
|
| 162 |
+
storage_options = ['None', 'small', 'medium', 'large']
|
| 163 |
+
hf_storage = st.selectbox(
|
| 164 |
+
'Persistent storage',
|
| 165 |
+
['None', 'small', 'medium', 'large'],
|
| 166 |
+
help='You will get charged for persistent storage. See https://huggingface.co/docs/hub/spaces-storage',
|
| 167 |
+
index=storage_options.index(hf_storage if hf_storage else 'None'),
|
| 168 |
+
)
|
| 169 |
+
# public_space = st.checkbox(
|
| 170 |
+
# 'Make space public',
|
| 171 |
+
# help='If checked, your space will be made publicly visible.',
|
| 172 |
+
# value=public_space,
|
| 173 |
+
# )
|
| 174 |
+
|
| 175 |
+
deploy_pressed = False
|
| 176 |
+
|
| 177 |
+
def _deploy_button():
|
| 178 |
+
enabled = hf_access_token and hf_space_name
|
| 179 |
+
return st.button('Deploy', disabled=not enabled, on_click=_deploy)
|
| 180 |
+
|
| 181 |
+
print('pressed=', deploy_pressed)
|
| 182 |
+
|
| 183 |
+
def _deploy():
|
| 184 |
+
hf_dataset_name = st.session_state['hf_dataset_name']
|
| 185 |
+
assert hf_space_name and hf_access_token and hf_dataset_name
|
| 186 |
+
|
| 187 |
+
hf_config_name = st.session_state.get('hf_config_name', None)
|
| 188 |
+
hf_split = st.session_state.get('hf_split', None)
|
| 189 |
+
sample_size = st.session_state.get('sample_size', None)
|
| 190 |
+
public_space = st.session_state.get('public_space', False)
|
| 191 |
+
|
| 192 |
+
hf_space_storage: Optional[Union[Literal['small'], Literal['medium'], Literal['large']]]
|
| 193 |
+
if hf_storage == 'None':
|
| 194 |
+
hf_space_storage = None
|
| 195 |
+
else:
|
| 196 |
+
assert hf_storage == 'small' or hf_storage == 'medium' or hf_storage == 'large'
|
| 197 |
+
hf_space_storage = hf_storage
|
| 198 |
+
|
| 199 |
+
try:
|
| 200 |
+
space_link = ll.deploy_config(
|
| 201 |
+
hf_space=hf_space_name,
|
| 202 |
+
create_space=True,
|
| 203 |
+
hf_space_storage=hf_space_storage,
|
| 204 |
+
config=ll.Config(
|
| 205 |
+
datasets=[
|
| 206 |
+
ll.DatasetConfig(
|
| 207 |
+
namespace='local',
|
| 208 |
+
name=hf_dataset_name.replace('/', '_'),
|
| 209 |
+
source=ll.HuggingFaceSource(
|
| 210 |
+
dataset_name=hf_dataset_name,
|
| 211 |
+
config_name=hf_config_name,
|
| 212 |
+
split=hf_split,
|
| 213 |
+
sample_size=int(sample_size) if sample_size else None,
|
| 214 |
+
token=hf_access_token,
|
| 215 |
+
),
|
| 216 |
+
)
|
| 217 |
+
]
|
| 218 |
+
),
|
| 219 |
+
hf_token=hf_access_token,
|
| 220 |
+
)
|
| 221 |
+
st.session_state.space_link = space_link
|
| 222 |
+
# print('got space link from python call:', )
|
| 223 |
+
st.session_state.current_page = 'success'
|
| 224 |
+
except Exception as e:
|
| 225 |
+
st.subheader('Deployment failed!', divider='red')
|
| 226 |
+
st.error(e)
|
| 227 |
+
|
| 228 |
+
deployed = _deploy_button()
|
| 229 |
+
if deployed:
|
| 230 |
+
deploy_pressed = True
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def _success_page():
|
| 234 |
+
hf_dataset_name = st.session_state['hf_dataset_name']
|
| 235 |
+
space_link = st.session_state.space_link
|
| 236 |
+
|
| 237 |
+
st.subheader('Success!', divider='green')
|
| 238 |
+
st.subheader(f'[Visit your HuggingFace space ↗]({space_link})')
|
| 239 |
+
st.write(
|
| 240 |
+
'Spaces are private by default. '
|
| 241 |
+
f'To make them public, visit the [Space settings]({space_link}/settings). '
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
if _get_page() == 'dataset':
|
| 246 |
+
_dataset_page()
|
| 247 |
+
elif _get_page() == 'space':
|
| 248 |
+
_space_page()
|
| 249 |
+
elif _get_page() == 'success':
|
| 250 |
+
_success_page()
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
dataset_name = st.session_state.get('ds_dataset_name', None) or st.session_state.get(
|
| 254 |
+
'hf_dataset_name', None
|
| 255 |
+
)
|
| 256 |
+
if st.session_state.get('ds_loaded', False):
|
| 257 |
+
st.sidebar.write('# HuggingFace dataset')
|
| 258 |
+
|
| 259 |
+
st.sidebar.header(
|
| 260 |
+
f'[{dataset_name}](https://huggingface.co/datasets/{dataset_name})',
|
| 261 |
+
divider='rainbow',
|
| 262 |
+
anchor=False,
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
st.sidebar.write(st.session_state.get('ds_description', None))
|
| 266 |
+
|
| 267 |
+
st.sidebar.write('##### Features')
|
| 268 |
+
st.sidebar.table(st.session_state.get('ds_features', {}))
|
| 269 |
+
|
| 270 |
+
st.sidebar.write('##### Splits')
|
| 271 |
+
st.sidebar.table(st.session_state.get('ds_splits', {}))
|
| 272 |
+
else:
|
| 273 |
+
if st.session_state.get('ds_error', None):
|
| 274 |
+
st.sidebar.subheader(f'Error loading `{dataset_name}`', divider='red', anchor=False)
|
| 275 |
+
st.sidebar.error(st.session_state.get('ds_error', None))
|
| 276 |
+
st.sidebar.write(
|
| 277 |
+
'If the dataset is private, make sure to enter a HuggingFace '
|
| 278 |
+
'token that has access to the dataset.'
|
| 279 |
+
)
|
| 280 |
+
else:
|
| 281 |
+
st.sidebar.write('Choose a dataset to see more info..')
|
requirements.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|