Changes in the data stored and format will affect how the information is processed and stored. An update method was created to change the storage.
import ipywidgets as widgets
from IPython.core.display import display, HTML, update_display
import json, os, pickle
from random import seed, randint
from tweet_requester.analysis import TweetAnalyzer
from tweet_requester.display import TweetInteractiveClassifier, \
JsonLInteractiveClassifier, TSess, prepare_google_credentials, PROCESSING_STAGES, logging
from twitter_secrets import C_BEARER_TOKEN
JL_DATA="./tweetsRickyRenuncia-final.jsonl"
BASE_DIR="./Evaluating Content"
# Update database
#April 30, 2021 the RR team rehydrated with twarc their data.
april302021 = 1619755200.0
# git_commit="9219b7a01ce28f5bc0d61c913b3f914f967614fd"
git_commit="2ac78595cceef98a56c518c24f2187360e1527e3"
tweet_session = TSess(
C_BEARER_TOKEN,
compression_level=5,
sleep_time=3,
cache_dir="./tweet_cache/",
hash_split=True
)
google_credentials = prepare_google_credentials(
credentials_file="./google_translate_keys.json"
)
classifier = JsonLInteractiveClassifier(
tweet_ids_file="tweetsRickyRenuncia-final.txt",
session=tweet_session, mute=True,
google_credentials=google_credentials,
pre_initialized=True, sqlite_db="tweets.db"
)
classifier.close()
import logging
logging.basicConfig(level=logging.WARNING)
classifier.update_database_v01_v02(dateCreated=april302021, git_commit=git_commit)
classifier.update_database_v02_v03(git_commit=git_commit)
classifier.update_database_v03_v04(git_commit=git_commit)
WARNING:root:Database version is 0.3 >= 0.2. Skipping update. WARNING:root:Database version is greater than expected 0.3 > 0.2. This update does not apply.
classifier.connect()
cur = classifier.cursor()
cur.execute("""
SELECT state, count(*) from tweet
GROUP BY state ORDER BY state;""")
rows = cur.fetchall()
print("{:>25} | {:<8}".format("PROCESSING_STAGE", "COUNT"))
print("{:>25} | {:<8}".format("-"*25, "-"*8))
for row in rows:
print("{:>25} | {:<8}".format(PROCESSING_STAGES(row[0]).name, row[1]))
cur.execute("""
SELECT * from tweet
WHERE tweet_id in (
SELECT tweet_id FROM tweet
WHERE state in (?));""",
(PROCESSING_STAGES.PREPROCESSED.value,))
rows_sample = cur.fetchall()
print("\n\nSample: ")
n=0
cur.close()
for row in rows_sample:
print("\t",row)
n+=1
if n > 4:
break
PROCESSING_STAGE | COUNT
------------------------- | --------
UNPROCESSED | 493031
REVIEWING | 373
FINALIZED | 68
UNAVAILABLE_EMBEDING | 1328
RETWEET | 2814
PREPROCESSED | 2714
Sample:
('1002186716046864386', 6)
('1102716035176775681', 6)
('1138785914757533696', 6)
('1148321742697504769', 6)
('1149490876592218113', 6)
classifier.display_accepted(page=3, per_page=3)
“Lárgate para el carajo pa’ la China o pa’ el Japón” 😂😂😂#RickyRenuncia pic.twitter.com/anixxn5PUO
— JunyPR3 (@herreraevanol) July 15, 2019
Translation to 'en':
"Get off to hell for China or for Japan" 😂😂😂 #RickyRenuncia https://t.co/anixxn5PUO
#RickyRenuncia TRENDING a nivel MUNDIAL. ¡No te quites boricua!🇵🇷✊🏼 pic.twitter.com/9oZopQC89y
— Fulanita (@YanieYampier) July 15, 2019
Translation to 'en':
#RickyRenuncia TRENDING WORLDWIDE. Don't take off boricua! 🇵🇷✊🏼 https://t.co/9oZopQC89y
#RickyRenuncia #PUTA
— PJ Sin Suela (@pjsinsuela) July 15, 2019
Canción completa: https://t.co/iqqykDyibz pic.twitter.com/U1mtZeNwzp
Translation to 'en':
#RickyRenuncia #PUTA Full song: https://t.co/iqqykDyibz https://t.co/U1mtZeNwzp
classifier.StartEvaluations()
classifier.connect()
cur = classifier.cursor()
cur.execute("""
SELECT * from tweet
WHERE tweet_id in (
SELECT tweet_id FROM tweet
WHERE state in (?));""",
(PROCESSING_STAGES.REVIEWING.value,))
rows = cur.fetchall()
n=0
cur.close()
for row in rows:
print(row)
classifier.tweet_set_state(
tweet_id=row[0],
state=PROCESSING_STAGES.UNPROCESSED
)
n+=1
if n > 9:
break
('1150839690184069127', 1)
('1150842425390317573', 1)
('1150848287299244036', 1)
('1150849765376241664', 1)
('1150850130704248833', 1)
('1150857492173398018', 1)
('1150859636851040256', 1)
('1150862160760909824', 1)
('1150862260237164545', 1)
('1150863149429592064', 1)
page=5
per_page=5
classifier.display_accepted(page=page, per_page=per_page)
MAÑANA todoooosss para SJ!!!! Y si no puedes ir explota tus redes con #RickyRenuncia, no se queden callados. Esto es cuestión de todos poner un granito de arena, dejen las excusas y APOYEN. pic.twitter.com/S9Jtje2KO4
— Ro (@rfxvi) July 16, 2019
Translation to 'en':
TOMORROW todoooosss for SJ !!!! And if you can't go exploit your networks with #RickyRenuncia , do not be silent. This is a matter for everyone to put a grain of sand, leave the excuses and SUPPORT. https://t.co/S9Jtje2KO4
Las paredes ya están pintadas, el país sigue jodío y Ricky no ha renunciado #RickyRenuncia pic.twitter.com/34VfA3qQ6Z
— Edgo (@edgo787) July 16, 2019
Translation to 'en':
The walls are already painted, the country is still screwed and Ricky has not given up #RickyRenuncia https://t.co/34VfA3qQ6Z
#RickyRenuncia https://t.co/hwPQSAbaCT
— Davinchi Almodovar (@eldavinchi) July 16, 2019
Translation to 'en':
#RickyRenuncia https://t.co/hwPQSAbaCT
I had 8 questions for the embattled Governor of Puerto Rico. Here’s how he responded. pic.twitter.com/7OwX5Ja5B8
— David Begnaud (@DavidBegnaud) July 16, 2019
Translation to 'en':
I had 8 questions for the embattled Governor of Puerto Rico. Here’s how he responded. https://t.co/7OwX5Ja5B8
El ambiente en Mayagüez ahora mismo. #RickyRenunciaYa #RickyRenuncia pic.twitter.com/NLKDvQ5tzO
— Mulata (@NataliaNicole) July 16, 2019
Translation to 'en':
The atmosphere in Mayagüez right now. #RickyRenunciaYa #RickyRenuncia https://t.co/NLKDvQ5tzO
from datetime import datetime
from time import sleep
import logging
last_pull=datetime.now().timestamp()-900
current_time=end = datetime.now().timestamp()
while True:
if current_time - last_pull > 900:
start_pull = datetime.now().timestamp()
try:
classifier.preprocess_batch(n=150)
except Exception as err:
logging.error(err)
break
# Average the download time to the middle of the transaction.
last_pull = (start_pull + datetime.now().timestamp())/2.0
else:
current_time = datetime.now().timestamp()
# sleep for time left for 15 minutes
sleep(900 - (current_time - last_pull))
current_time = datetime.now().timestamp()
Preprocessed 94
classifier.preprocess_batch(n=250)
Preprocessed 250
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install tweet-requester
Collecting tweet-requester
Downloading tweet_requester-0.0.1-py3-none-any.whl (21 kB)
Collecting google-cloud-translate>=3.3.1
Downloading google_cloud_translate-3.3.2-py2.py3-none-any.whl (104 kB)
|████████████████████████████████| 104 kB 367 kB/s eta 0:00:01
Collecting ipython>=7.25.0
Downloading ipython-7.26.0-py3-none-any.whl (786 kB)
|████████████████████████████████| 786 kB 443 kB/s eta 0:00:01
Requirement already satisfied: proto-plus>=0.4.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-cloud-translate>=3.3.1->tweet-requester) (1.19.0)
Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.3.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-cloud-translate>=3.3.1->tweet-requester) (1.7.1)
Requirement already satisfied: google-api-core[grpc]<3.0.0dev,>=1.26.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-cloud-translate>=3.3.1->tweet-requester) (1.31.0)
Requirement already satisfied: packaging>=14.3 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-cloud-translate>=3.3.1->tweet-requester) (20.9)
Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (2.25.1)
Requirement already satisfied: setuptools>=40.3.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (57.4.0)
Requirement already satisfied: pytz in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (2021.1)
Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (1.53.0)
Requirement already satisfied: protobuf>=3.12.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (3.17.3)
Requirement already satisfied: google-auth<2.0dev,>=1.25.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (1.33.0)
Requirement already satisfied: six>=1.13.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (1.15.0)
Requirement already satisfied: grpcio<2.0dev,>=1.29.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (1.38.1)
Requirement already satisfied: rsa<5,>=3.1.4 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-auth<2.0dev,>=1.25.0->google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (4.7.2)
Requirement already satisfied: pyasn1-modules>=0.2.1 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-auth<2.0dev,>=1.25.0->google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (0.2.8)
Requirement already satisfied: cachetools<5.0,>=2.0.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from google-auth<2.0dev,>=1.25.0->google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (4.2.2)
Requirement already satisfied: pexpect>4.3 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from ipython>=7.25.0->tweet-requester) (4.8.0)
Requirement already satisfied: backcall in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from ipython>=7.25.0->tweet-requester) (0.2.0)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from ipython>=7.25.0->tweet-requester) (3.0.14)
Requirement already satisfied: traitlets>=4.2 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from ipython>=7.25.0->tweet-requester) (5.0.5)
Requirement already satisfied: jedi>=0.16 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from ipython>=7.25.0->tweet-requester) (0.18.0)
Requirement already satisfied: decorator in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from ipython>=7.25.0->tweet-requester) (4.4.2)
Collecting matplotlib-inline
Using cached matplotlib_inline-0.1.2-py3-none-any.whl (8.2 kB)
Requirement already satisfied: pickleshare in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from ipython>=7.25.0->tweet-requester) (0.7.5)
Requirement already satisfied: pygments in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from ipython>=7.25.0->tweet-requester) (2.7.4)
Requirement already satisfied: parso<0.9.0,>=0.8.0 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from jedi>=0.16->ipython>=7.25.0->tweet-requester) (0.8.1)
Requirement already satisfied: pyparsing>=2.0.2 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from packaging>=14.3->google-cloud-translate>=3.3.1->tweet-requester) (2.4.7)
Requirement already satisfied: ptyprocess>=0.5 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from pexpect>4.3->ipython>=7.25.0->tweet-requester) (0.7.0)
Requirement already satisfied: wcwidth in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=7.25.0->tweet-requester) (0.2.5)
Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.25.0->google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (0.4.8)
Requirement already satisfied: idna<3,>=2.5 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (2.10)
Requirement already satisfied: certifi>=2017.4.17 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (2020.12.5)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (1.26.3)
Requirement already satisfied: chardet<5,>=3.0.2 in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core[grpc]<3.0.0dev,>=1.26.0->google-cloud-translate>=3.3.1->tweet-requester) (3.0.4)
Requirement already satisfied: ipython-genutils in /home/torrien/.virtualenvs/jupyterhub/lib/python3.8/site-packages (from traitlets>=4.2->ipython>=7.25.0->tweet-requester) (0.2.0)
Installing collected packages: matplotlib-inline, ipython, google-cloud-translate, tweet-requester
Attempting uninstall: ipython
Found existing installation: ipython 7.20.0
Uninstalling ipython-7.20.0:
Successfully uninstalled ipython-7.20.0
Attempting uninstall: google-cloud-translate
Found existing installation: google-cloud-translate 3.2.1
Uninstalling google-cloud-translate-3.2.1:
Successfully uninstalled google-cloud-translate-3.2.1
Successfully installed google-cloud-translate-3.3.2 ipython-7.26.0 matplotlib-inline-0.1.2 tweet-requester-0.0.1
WARNING: You are using pip version 21.2.1; however, version 21.2.2 is available.
You should consider upgrading via the '/home/torrien/.virtualenvs/jupyterhub/bin/python -m pip install --upgrade pip' command.