process_tweet<-function(tweet) {
  if(missing(tweet))
    stop("tweet should be character vector.")
  if(any(is(tweet)[c(1,2)] != c("character","vector")))
    stop("tweet should be character vector.")

  if (is.character(tweet)) {
    tweets_df <- tidyr::tibble(tweet = tweet)
  } else {
    tweets_df <- tweet
  }

  # Convert tweet text to lowercase
  tweets_df[[1]] <- tolower(tweets_df[[1]])

  # Remove URLs
  tweets_df[[1]] <- gsub("http\\S+|www\\S+", "", tweets_df[[1]])

  # Remove usernames
  tweets_df[[1]] <- gsub("@\\w+", "", tweets_df[[1]])

  # Remove emoticons
  tweets_df[[1]] <- gsub("[^\x01-\x7F]", "", tweets_df[[1]])

  # Remove punctuation
  tweets_df[[1]] <- gsub("[^[:alnum:]\\s]", " ", tweets_df[[1]])

  # Remove extra spaces
  tweets_df[[1]] <- gsub("\\s+", " ", tweets_df[[1]])

  # Remove stop words and short words
  tweets_df0 <- tweets_df %>%
    tidytext::unnest_tokens(word, tweet) %>%
    dplyr::filter(!word %in% tidytext::stop_words$word & nchar(word) > 2)

  stwrd=tidytext::stop_words
  tok2=quanteda::tokens(tweets_df[[1]])
  stem4=quanteda::tokens_wordstem(tok2,language='english')
  ll=length(tweets_df[[1]])
 
  # Remove stop words
  s1 <- sapply(1:ll,function(x)unique((stem4[x][[1]])[!stem4[x][[1]]%in%stwrd$word])) 
  if(mode(s1)!="list") {
        tweets_df[[1]] <- t(s1)
        tweets_df[[1]] <- list(tweets_df[[1]]) 
  } else {
    tweets_df[[1]] <- s1
  }  
 
  # Remove short tweets
  tweets_df[[2]] <- lapply(tweets_df[[1]], function(x) x[nchar(x) > 2])

   ## Concatenate all tweets in the first column of 'tweets_df' into a single string with spaces as separators.
  tweets_df[[1]]  <- lapply(tweets_df[[2]], function(x)paste(x,sep=' ',collapse=' '))
 
  tweets_df1 =  unlist(tweets_df[[1]])
 
  mylist = list(tweets_df0 = tweets_df0, tweets_df1 = tweets_df1)

  return(mylist)
}
