#!/usr/bin/env python
# coding: utf-8

# # Update Database Structure
# 
# Changes in the data stored and format will affect how the information is processed and stored. An update method was created to change the storage.
# 
# 

# In[1]:


import ipywidgets as widgets
from IPython.core.display import display, HTML, update_display
import json, os, pickle
from random import seed, randint
from tweet_requester.analysis import TweetAnalyzer
from tweet_requester.display import TweetInteractiveClassifier, \
JsonLInteractiveClassifier, TSess, prepare_google_credentials, PROCESSING_STAGES, logging
from twitter_secrets import C_BEARER_TOKEN 
JL_DATA="./tweetsRickyRenuncia-final.jsonl"
BASE_DIR="./Evaluating Content"
# Update database
#April 30, 2021 the RR team rehydrated with twarc their data.
april302021 = 1619755200.0
# git_commit="9219b7a01ce28f5bc0d61c913b3f914f967614fd"
git_commit="2ac78595cceef98a56c518c24f2187360e1527e3"
tweet_session = TSess(
        C_BEARER_TOKEN, 
        compression_level=5, 
        sleep_time=3, 
        cache_dir="./tweet_cache/", 
        hash_split=True
    )
google_credentials = prepare_google_credentials(
    credentials_file="./google_translate_keys.json"
)


# In[2]:


classifier = JsonLInteractiveClassifier(
    tweet_ids_file="tweetsRickyRenuncia-final.txt", 
    session=tweet_session, mute=True, 
    google_credentials=google_credentials,
    pre_initialized=True, sqlite_db="tweets.db"
)


# In[3]:


classifier.close()


# In[4]:


import logging
logging.basicConfig(level=logging.WARNING)
classifier.update_database_v01_v02(dateCreated=april302021, git_commit=git_commit)
classifier.update_database_v02_v03(git_commit=git_commit)
classifier.update_database_v03_v04(git_commit=git_commit)


# In[4]:


classifier.connect()
cur = classifier.cursor()

cur.execute("""
SELECT state, count(*) from tweet
GROUP BY state ORDER BY state;""")
rows = cur.fetchall()
print("{:>25} | {:<8}".format("PROCESSING_STAGE", "COUNT"))
print("{:>25} | {:<8}".format("-"*25, "-"*8))
for row in rows:
    print("{:>25} | {:<8}".format(PROCESSING_STAGES(row[0]).name, row[1]))

cur.execute("""
SELECT * from tweet
WHERE tweet_id in (
SELECT tweet_id FROM tweet
WHERE state in (?));""",
(PROCESSING_STAGES.PREPROCESSED.value,))
rows_sample = cur.fetchall()

print("\n\nSample: ")
n=0
cur.close()
for row in rows_sample:
    print("\t",row)
    n+=1
    if n > 4:
        break


# In[5]:


classifier.display_accepted(page=3, per_page=3)


# In[5]:


classifier.StartEvaluations()


# In[6]:


classifier.connect()
cur = classifier.cursor()
cur.execute("""
SELECT * from tweet
WHERE tweet_id in (
SELECT tweet_id FROM tweet
WHERE state in (?));""",
(PROCESSING_STAGES.REVIEWING.value,))

rows = cur.fetchall()
n=0
cur.close()
for row in rows:
    print(row)
    classifier.tweet_set_state(
        tweet_id=row[0],
        state=PROCESSING_STAGES.UNPROCESSED
    )
    n+=1
    if n > 9:
        break


# In[7]:


page=5
per_page=5
classifier.display_accepted(page=page, per_page=per_page)


# In[ ]:


from datetime import datetime
from time import sleep
import logging
last_pull=datetime.now().timestamp()-900
current_time=end = datetime.now().timestamp()
while True:
    if current_time - last_pull > 900:
        start_pull = datetime.now().timestamp()
        try:
            classifier.preprocess_batch(n=150)
        except Exception as err:
            logging.error(err)
            break
        # Average the download time to the middle of the transaction.
        last_pull = (start_pull + datetime.now().timestamp())/2.0
    else:
        current_time = datetime.now().timestamp()
        # sleep for time left for 15 minutes
        sleep(900 - (current_time - last_pull))
        current_time = datetime.now().timestamp()


# In[8]:


classifier.preprocess_batch(n=250)


# In[10]:


# Install a pip package in the current Jupyter kernel
import sys
get_ipython().system('{sys.executable} -m pip install tweet-requester')