diff --git a/notebooks/5_optional_bert_classification_pipeline_huggingface.ipynb b/notebooks/5_optional_bert_classification_pipeline_huggingface.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..28cc990151470b3512fe3ed415a7f3f917426a7c --- /dev/null +++ b/notebooks/5_optional_bert_classification_pipeline_huggingface.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup and data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\ESenn\\Miniconda3\\envs\\llm_class\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from transformers import BertTokenizer, BertForSequenceClassification\n", + "from transformers import pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# load your data here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transformers Library for Sentiment Classification using BERT" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Task:** Review classification\n", + "\n", + "**Model:** A pretrained BERT model including a sentiment classification head (which is not trained on our task).\n", + "\n", + "We use the hidden states of BERT for Sentiment Classification.\n", + "The classifier is not trained. See notebooks for chapter 2 for training, and chapter 5 for a step-by-stey description of how to train it." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\ESenn\\Miniconda3\\envs\\llm_class\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Text: This movie was fantastic! I loved it.\n", + "Sentiment: 5 stars\n", + "Text: The food was terrible, I will never come back.\n", + "Sentiment: 1 star\n", + "Text: The service was just okay, nothing special.\n", + "Sentiment: 3 stars\n" + ] + } + ], + "source": [ + "# Load pre-trained BERT tokenizer and model for sentiment classification\n", + "tokenizer = BertTokenizer.from_pretrained(\n", + " \"nlptown/bert-base-multilingual-uncased-sentiment\"\n", + ")\n", + "model = BertForSequenceClassification.from_pretrained(\n", + " \"nlptown/bert-base-multilingual-uncased-sentiment\"\n", + ")\n", + "\n", + "# Load the classification pipeline\n", + "classifier = pipeline(\"sentiment-analysis\", model=model, tokenizer=tokenizer)\n", + "\n", + "# Example texts for classification\n", + "texts = [\n", + " \"This movie was fantastic! I loved it.\",\n", + " \"The food was terrible, I will never come back.\",\n", + " \"The service was just okay, nothing special.\",\n", + "]\n", + "\n", + "# Perform classification\n", + "results = classifier(texts)\n", + "\n", + "# Display the results\n", + "for text, result in zip(texts, results):\n", + " print(f\"Text: {text}\")\n", + " print(f\"Sentiment: {result['label']}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm_class", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}