https://huggingface.co./princeton-nlp/Sheared-LLaMA-1.3B

from deepsparse import TextGeneration
model = TextGeneration(model="hf:mgoin/Sheared-LLaMA-1.3B-ds")
model("Say hello world in 10 languages:", max_new_tokens=100)
# TextGenerationOutput(created=datetime.datetime(2023, 10, 17, 22, 42, 52, 602506), prompts='Say hello world in 10 languages:', generations=[GeneratedText(text='\nThe world is a big place, and we want to make it a little bit smaller.\nWe’re building a platform that lets you say hello to the world in 10 languages.\nWe’re building a platform that lets you say hello to the world in 10 languages. We’re building a platform that lets you say hello to the world in 10 languages. We’re building a platform that lets you say hello to the world in 10 languages.', score=None, finished=True, finished_reason='length')])
model("Here is a recipe for banana bread:", max_new_tokens=100)
# TextGenerationOutput(created=datetime.datetime(2023, 10, 17, 22, 43, 57, 755015), prompts='Here is a recipe for banana bread:', generations=[GeneratedText(text='a simple, delicious, and healthy way to start your day.\nI’ve been making this banana bread for years. It’s a great way to start your day. It’s easy to make, and it’s delicious.\nI’ve made this banana bread for years. It’s a great way to start your day. It’s easy to make, and it’s delicious.\nI’ve made this banana bread for years', score=None, finished=True, finished_reason='length')])