By @mhawksey

Collect/backup tweets in a Google Spreadsheet on one sheet

A quick recap in June 2010 I hastily threw together a way to archive tweets using Google Spreadsheets. This was okay but a bit flaky. In February (2011) having learned a lot more about Google Spreadsheets and Google Apps Script I reworked the solution producing version 2 Collect/backup tweets in a Google Spreadsheet.

I would like to say that this idea has gone down like a storm, but โ€ฆ well according to the Docs Template Gallery its got 17 users โ€“ 5 star rating though ๐Ÿ˜‰ that might be my vote ๐Ÿ™ I wonder if this is so low because people can make a copy of the spreadsheet from the link in the original post?

Ignoring this overwhelming success there was one tweak I was meaning to do for a while. Currently the Spreadsheet dumps the results from itโ€™s search into a new sheet each time it runs. This means you can end up with lots of duplicate tweets across different sheets. So when Tony Hirst recently asked:

@mhawksey do you have a twitterlytics archiver that appends tweets to one sheet rather than form a new sheet for each run? 28 Apr 2011

the time felt right to do something about this. To get the ball rolling Tony also kindly contributed a coding snippet which finds new tweets from the search results. This solution is a bit greedy running a search collecting up to 1500 items then comparing with what is already there. Meanwhile I was looking at the Twitter Search API documentation to see if I could filter the results to remove previous results (the answer was yes). The disadvantage of this solution is the results canโ€™t be paged so you are limited to 100 results.ย 

The latest version of the spreadsheet has been included in the Collect/backup tweets in a Google Spreadsheet post (it defaults to using the twitter API to work out the last tweet, but the greedy version is included in the source code). Below is a copy of the main bits of the code. Thanks to Tony for the prod and code suggestion!

function collectTweets() {
  // if continuous sheetname = archive else make a name
  if (RESULT_TYPE == "continuous"){
    var sheetName = "Archive";
  } else {
    var sheetName = Utilities.formatDate(new Date(), "GMT", "dd-MMM-yy hh:mm"); //make a new sheet name based on todays date
  }
  // if sheetname doesn't exisit make it
  if (!ss.getSheetByName(sheetName)){
    var temp = ss.getSheetByName("TMP");
    var sheet = ss.insertSheet(sheetName, {template:temp});
  } else {
    var sheet = ss.getSheetByName(sheetName);
  }
  // else if already have archive get since id
  var stored = getRowsData(sheet);
  if (stored.length >0 ){
    var sinceid = stored[0]["id"];
  }
  // prepare search term
  var myUntil=new Date();
  myUntil.setDate(myUntil.getDate()-1);
  var mySince = new Date(myUntil);
  if (SEARCH_DURATION == " weekly"){
    myUntil.setDate(myUntil.getDate()+1);
    mySince.setDate(mySince.getDate()-6);
  }
  //if no since id grab search results
  if (sinceid){
    var data = getTweets(SEARCH_TERM, mySince, myUntil, NUMBER_OF_TWEETS, sinceid); // get results from twitter sinceid
  } else {
    var data = getTweets(SEARCH_TERM, mySince, myUntil, NUMBER_OF_TWEETS); // get results from twitter
  }
  // if some data insert rows
  if (data.length > 0){
    sheet.insertRowsAfter(1, data.length);
    setRowsData(sheet, data);
  }
  /*
  // This is a very greedy grab (the limit of since id is it has max of 100 results, this method gives you 1500)
  var data = [];
  if (sinceid){
    for (var i = 0; i  sinceid){
        data.push(row);
      }
    }
    if (data.length > 0){
      sheet.insertRowsAfter(1, data.length);
    }
  } else {
    data = objects;
  }
  */
}
function getTweets(searchTerm, since, until, maxResults, sinceid, languageCode) {
    //Based on Mikael Thuneberg getTweets - mod by mhawksey to convert to json
    // if you include setRowsData this can be used to output chosen entries
    try {
        var pagenum = 1;
      var data =[];
      var idx = 0;
      var sinceurl ="";
        if (typeof maxResults == "undefined") {
            maxResults = 100;
        }
        if (maxResults > 1500) {
            maxResults = 1500;
        }
        if (maxResults > 100) {
            resultsPerPage = 100;
            maxPageNum = maxResults / 100;
        } else {
            resultsPerPage = maxResults;
            maxPageNum = 1;
        }
        if (typeof sinceid != "undefined") {
            if (sinceid.length > 0) {
                resultsPerPage = 100;
                maxPageNum = 1;
                sinceurl = "&since_id=" + sinceid;
            }
        }
        Logger.log(twDate(since)+" "+twDate(until));
        searchTerm = encodeURIComponent(searchTerm);
        for (pagenum = 1; pagenum  0) {
                    URL = URL + "&lang=" + languageCode;
                }
            }
            var response = UrlFetchApp.fetch(URL, {method:'get', headers: { "User-Agent": REFERER}});
            if (response.getResponseCode() == 200) {
              var objects = Utilities.jsonParse(response.getContentText()).results;
              for (i in objects){ // not pretty but I wanted to extract geo data
                if (objects[i].geo != null){
                  objects[i]["geo_coordinates"] = "loc: "+objects[i].geo.coordinates[0]+","+objects[i].geo.coordinates[1];
                }
                objects[i]["status_url"] = "http://twitter.com/"+objects[i].from_user+"/statuses/"+objects[i].id_str;
                data[idx]=objects[i];
                idx ++;
              }
            }
          }
    return data;
    } catch (e) {
        return e.message;
    }
}
Exit mobile version