Collect/backup tweets in a Google Spreadsheet on one sheet

Martin Hawksey

13 years ago

A quick recap in June 2010 I hastily threw together a way to archive tweets using Google Spreadsheets. This was okay but a bit flaky. In February (2011) having learned a lot more about Google Spreadsheets and Google Apps Script I reworked the solution producing version 2 Collect/backup tweets in a Google Spreadsheet.

I would like to say that this idea has gone down like a storm, but … well according to the Docs Template Gallery its got 17 users – 5 star rating though 😉 that might be my vote 🙁 I wonder if this is so low because people can make a copy of the spreadsheet from the link in the original post?

Ignoring this overwhelming success there was one tweak I was meaning to do for a while. Currently the Spreadsheet dumps the results from it’s search into a new sheet each time it runs. This means you can end up with lots of duplicate tweets across different sheets. So when Tony Hirst recently asked:

@mhawksey do you have a twitterlytics archiver that appends tweets to one sheet rather than form a new sheet for each run?
— Tony Hirst (@psychemedia) April 28, 2011

The time felt right to do something about this. To get the ball rolling Tony also kindly contributed a coding snippet which finds new tweets from the search results. This solution is a bit greedy running a search collecting up to 1500 items then comparing with what is already there. Meanwhile I was looking at the Twitter Search API documentation to see if I could filter the results to remove previous results (the answer was yes). The disadvantage of this solution is the results can’t be paged so you are limited to 100 results.

The latest version of the spreadsheet has been included in the Collect/backup tweets in a Google Spreadsheet post (it defaults to using the twitter API to work out the last tweet, but the greedy version is included in the source code). Below is a copy of the main bits of the code. Thanks to Tony for the prod and code suggestion!

function collectTweets() {
  // if continuous sheetname = archive else make a name
  if (RESULT_TYPE == "continuous"){
    var sheetName = "Archive";
  } else {
    var sheetName = Utilities.formatDate(new Date(), "GMT", "dd-MMM-yy hh:mm"); //make a new sheet name based on todays date
  }
  // if sheetname doesn't exisit make it
  if (!ss.getSheetByName(sheetName)){
    var temp = ss.getSheetByName("TMP");
    var sheet = ss.insertSheet(sheetName, {template:temp});
  } else {
    var sheet = ss.getSheetByName(sheetName);
  }
  // else if already have archive get since id
  var stored = getRowsData(sheet);
  if (stored.length >0 ){
    var sinceid = stored[0]["id"];
  }
  // prepare search term
  var myUntil=new Date();
  myUntil.setDate(myUntil.getDate()-1);
  var mySince = new Date(myUntil);
  if (SEARCH_DURATION == " weekly"){
    myUntil.setDate(myUntil.getDate()+1);
    mySince.setDate(mySince.getDate()-6);
  }
  //if no since id grab search results
  if (sinceid){
    var data = getTweets(SEARCH_TERM, mySince, myUntil, NUMBER_OF_TWEETS, sinceid); // get results from twitter sinceid
  } else {
    var data = getTweets(SEARCH_TERM, mySince, myUntil, NUMBER_OF_TWEETS); // get results from twitter
  }
  // if some data insert rows
  if (data.length > 0){
    sheet.insertRowsAfter(1, data.length);
    setRowsData(sheet, data);
  }
  /*
  // This is a very greedy grab (the limit of since id is it has max of 100 results, this method gives you 1500)
  var data = [];
  if (sinceid){
    for (var i = 0; i  sinceid){
        data.push(row);
      }
    }
    if (data.length > 0){
      sheet.insertRowsAfter(1, data.length);
    }
  } else {
    data = objects;
  }
  */
}
function getTweets(searchTerm, since, until, maxResults, sinceid, languageCode) {
    //Based on Mikael Thuneberg getTweets - mod by mhawksey to convert to json
    // if you include setRowsData this can be used to output chosen entries
    try {
        var pagenum = 1;
      var data =[];
      var idx = 0;
      var sinceurl ="";
        if (typeof maxResults == "undefined") {
            maxResults = 100;
        }
        if (maxResults > 1500) {
            maxResults = 1500;
        }
        if (maxResults > 100) {
            resultsPerPage = 100;
            maxPageNum = maxResults / 100;
        } else {
            resultsPerPage = maxResults;
            maxPageNum = 1;
        }
        if (typeof sinceid != "undefined") {
            if (sinceid.length > 0) {
                resultsPerPage = 100;
                maxPageNum = 1;
                sinceurl = "&since_id=" + sinceid;
            }
        }
        Logger.log(twDate(since)+" "+twDate(until));
        searchTerm = encodeURIComponent(searchTerm);
        for (pagenum = 1; pagenum  0) {
                    URL = URL + "&lang=" + languageCode;
                }
            }
            var response = UrlFetchApp.fetch(URL, {method:'get', headers: { "User-Agent": REFERER}});
            if (response.getResponseCode() == 200) {
              var objects = Utilities.jsonParse(response.getContentText()).results;
              for (i in objects){ // not pretty but I wanted to extract geo data
                if (objects[i].geo != null){
                  objects[i]["geo_coordinates"] = "loc: "+objects[i].geo.coordinates[0]+","+objects[i].geo.coordinates[1];
                }
                objects[i]["status_url"] = "http://twitter.com/"+objects[i].from_user+"/statuses/"+objects[i].id_str;
                data[idx]=objects[i];
                idx ++;
              }
            }
          }
    return data;
    } catch (e) {
        return e.message;
    }
}