In this project, we will be building our interactive Web-app data dashboard using streamlit library in Python. We will be doing sentiment analysis of Twitter US Airline Data.
Streamlit is an open-source Python library that makes it easy to create and share beautiful, custom web apps for machine learning and data science. In just a few minutes you can build and deploy powerful data apps
We are going to focus on three objectives:
1. Build interactive data dashboards with Streamlit and Python.
2. Use pandas for data manipulation in data science workflows.
3. Create interactive plots with Plotly Python.
So, our project will be divided into following task
Task 1: Install streamlit library
(i)Make sure that you have Python 3.6 or greater installed.
(ii)Install Streamlit using PIP:
pip install streamlit
(iv)Run the hello world app:
streamlit hello
(v)In the next few seconds the sample app will open in a new tab in your default browser.
Task 2: Import packages and Libraries
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from wordcloud
import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
Task 3: Load the Twitter US Airline Sentiment Data
DATA_URL = ( "Tweets.csv" ) st.title("Sentiment Analysis of Tweets about US Airlines") st.sidebar.title("Sentiment Analysis of Tweets") st.markdown("This application is a Streamlit dashboard used " "to analyze sentiments of tweets ????") st.sidebar.markdown("This application is a Streamlit dashboard used " "to analyze sentiments of tweets ????") @st.cache(persist=True) def load_data(): data = pd.read_csv(DATA_URL) data['tweet_created'] = pd.to_datetime(data['tweet_created']) return data data = load_data()
Task 4: Display Tweets in the Sidebar
st.sidebar.subheader("Show random tweet") random_tweet = st.sidebar.radio('Sentiment', ('positive', 'neutral', 'negative')) st.sidebar.markdown(data.query("airline_sentiment == @random_tweet")[["text"]].sample(n=1).iat[0, 0])
Task 5: Plot Interactive Bar Plots and Pie Charts
st.sidebar.markdown("### Number of tweets by sentiment") select = st.sidebar.selectbox('Visualization type', ['Bar plot', 'Pie chart'], key='1') sentiment_count = data['airline_sentiment'].value_counts() sentiment_count = pd.DataFrame({'Sentiment':sentiment_count.index, 'Tweets':sentiment_count.values}) if not st.sidebar.checkbox("Hide", True): st.markdown("### Number of tweets by sentiment") if select == 'Bar plot': fig = px.bar(sentiment_count, x='Sentiment', y='Tweets', color='Tweets', height=500) st.plotly_chart(fig) else: fig = px.pie(sentiment_count, values='Tweets', names='Sentiment') st.plotly_chart(fig)
Task 6: Plotting Location Data on an Interactive Map
st.sidebar.subheader("When and where are users tweeting from?") hour = st.sidebar.slider("Hour to look at", 0, 23) modified_data = data[data['tweet_created'].dt.hour == hour] if not st.sidebar.checkbox("Close", True, key='1'): st.markdown("### Tweet locations based on time of day") st.markdown("%i tweets between %i:00 and %i:00" % (len(modified_data), hour, (hour + 1) % 24)) st.map(modified_data) if st.sidebar.checkbox("Show raw data", False): st.write(modified_data)
Task 7: Plot Number of Tweets by Sentiment for Each Airline
st.sidebar.subheader("Total number of tweets for each airline") each_airline = st.sidebar.selectbox('Visualization type', ['Bar plot', 'Pie chart'], key='2') airline_sentiment_count = data.groupby('airline')['airline_sentiment'].count().sort_values(ascending=False) airline_sentiment_count = pd.DataFrame({'Airline':airline_sentiment_count.index, 'Tweets':airline_sentiment_count.values.flatten()}) if not st.sidebar.checkbox("Close", True, key='2'): if each_airline == 'Bar plot': st.subheader("Total number of tweets for each airline") fig_1 = px.bar(airline_sentiment_count, x='Airline', y='Tweets', color='Tweets', height=500) st.plotly_chart(fig_1) if each_airline == 'Pie chart': st.subheader("Total number of tweets for each airline") fig_2 = px.pie(airline_sentiment_count, values='Tweets', names='Airline') st.plotly_chart(fig_2)
Task 8: Ploting Sentiment
@st.cache(persist=True) def plot_sentiment(airline): df = data[data['airline']==airline] count = df['airline_sentiment'].value_counts() count = pd.DataFrame({'Sentiment':count.index, 'Tweets':count.values.flatten()}) return count st.sidebar.subheader("Breakdown airline by sentiment") choice = st.sidebar.multiselect('Pick airlines', ('US Airways','United','American','Southwest','Delta','Virgin America')) if len(choice) > 0: st.subheader("Breakdown airline by sentiment") breakdown_type = st.sidebar.selectbox('Visualization type', ['Pie chart', 'Bar plot', ], key='3') fig_3 = make_subplots(rows=1, cols=len(choice), subplot_titles=choice) if breakdown_type == 'Bar plot': for i in range(1): for j in range(len(choice)): fig_3.add_trace( go.Bar(x=plot_sentiment(choice[j]).Sentiment, y=plot_sentiment(choice[j]).Tweets, showlegend=False), row=i+1, col=j+1 ) fig_3.update_layout(height=600, width=800) st.plotly_chart(fig_3) else: fig_3 = make_subplots(rows=1, cols=len(choice), specs=[[{'type':'domain'}]*len(choice)], subplot_titles=choice) for i in range(1): for j in range(len(choice)): fig_3.add_trace( go.Pie(labels=plot_sentiment(choice[j]).Sentiment, values=plot_sentiment(choice[j]).Tweets, showlegend=True), i+1, j+1 ) fig_3.update_layout(height=600, width=800) st.plotly_chart(fig_3) st.sidebar.subheader("Breakdown airline by sentiment") choice = st.sidebar.multiselect('Pick airlines', ('US Airways','United','American','Southwest','Delta','Virgin America'), key=0) if len(choice) > 0: choice_data = data[data.airline.isin(choice)] fig_0 = px.histogram( choice_data, x='airline', y='airline_sentiment', histfunc='count', color='airline_sentiment', facet_col='airline_sentiment', labels={'airline_sentiment':'tweets'}, height=600, width=800) st.plotly_chart(fig_0)
Task 9: Word Cloud for Positive, Neutral, and Negative Tweets
st.sidebar.header("Word Cloud") word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?', ('positive', 'neutral', 'negative')) if not st.sidebar.checkbox("Close", True, key='3'): st.subheader('Word cloud for %s sentiment' % (word_sentiment)) df = data[data['airline_sentiment']==word_sentiment] words = ' '.join(df['text']) processed_words = ' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word != 'RT']) wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=640).generate(processed_words) plt.imshow(wordcloud) plt.xticks([]) plt.yticks([]) st.pyplot()
Congratulations! We made to the final step.
Now open the terminal and run this command.
streamlit run app.py
And you will see our interactive dashboard app will open in a new tab in your default browser.
Submitted by Madhav Sharma (Madhav)
Download packets of source code on Coders Packet
Comments