Google Sheets Function: unsign Vietnamese text

I recently received a request from someone at Hanoi Medical University to create a script that would ‘unsign’ Vietnamese text in a Google Sheet. They are using Google Sheets to automatically create email addresses for people who work there, and they were running into a problem with all of the accents, tones and signs that are common in Vietnamese.

For example:

“Bùi Thị Kim” needed to be rendered as an email: “buithikim@hmu.edu.vn”

They sent me a script in VBA that works with Excel, and I just converted it over to Google Scripts.

To use this, just open up Google Sheets, click Tools, Script Editor, delete whatever you see in the template and paste in the code below.

The basic use of the function is “=ConvertToUnSign(string)”. If you want to do some additional modifications to make it into an email like the one above, try this function:

“=CONCATENATE(SUBSTITUTE(LOWER(ConvertToUnSign(C2)),” “,””),”@hmu.edu.vn”)”

If you find it helpful, let me know in the comment section below!

function ConvertToUnSign(sContent) {
  var charArray = [];
  var unSignCharArray = [];
  for (i=0; i<sContent.length; i++) {
    charArray[i] = sContent.charCodeAt(i);
    unSignCharArray[i] = String.fromCharCode(removeSign(charArray[i]));
  }
  return (unSignCharArray.join(""));
}

function removeSign(char) {
  switch (char)  
  {
    case 273:  // convert to 'd' 
      char = 100;
      break;
    
    case 272: // convert to 'D'
      char = 68;
      break;
    
    case 224: case 225: case 226: case 227: case 259: case 7841: case 7843: case 7845: case 7847: case 7849: case 7851: case 7853: case 7855: case 7857: case 7859: case 7861: case 7863: 
    // convert to "a"
      char = 97;
      break;
    
    case 192: case 193: case 194: case 195: case 258: case 7840: case 7842: case 7844: case 7846: case 7848: case 7850: case 7852: case 7854: case 7856: case 7858: case 7860: case 7862: 
    // convert to “A”
      char = 65;
      break;
    
    case 232: case 233: case 234: case 7865: case 7867: case 7869: case 7871: case 7873: case 7875: case 7877: case 7879: 
    // convert to “e”
      char = 101;
      break;
        
    case 200: case 201: case 202: case 7864: case 7866: case 7868: case 7870: case 7872: case 7874: case 7876: case 7878: 
    // convert to “E”
      char = 69;
      break;
        
    case 236: case 237: case 297: case 7881: case 7883: 
    // convert to “i”
      char = 105;
      break;
       
    case 204: case 205: case 296: case 7880: case 7882: 
    // convert & “I”
      char = 73;
      break;
        
    case 242: case 243: case 244: case 245: case 417: case 7885: case 7887: case 7889: case 7891: case 7893: case 7895: case 7897: case 7899: case 7901: case 7903: case 7905: case 7907: 
    // convert to “o”
      char = 111;
      break;
        
    case 210: case 211: case 212: case 213: case 416: case 7884: case 7886: case 7888: case 7890: case 7892: case 7894: case 7896: case 7898: case 7900: case 7902: case 7904: case 7906: 
    // convert to “O”
      char = 79;
      break;
        
    case 249: case 250: case 361: case 432: case 7909: case 7911: case 7913: case 7915: case 7917: case 7919: case 7921: 
    // convert to “u”
      char = 117;
      break;
        
    case 217: case 218: case 360: case 431: case 7908: case 7910: case 7912: case 7914: case 7916: case 7918: case 7920: 
    // convert to “U”
      char = 85;
      break;
        
    case 253: case 7923: case 7925: case 7927: case 7929: 
    // convert to “y”
      char = 121;
      break;
        
    case 221: case 7922: case 7924: case 7926: case 7928: 
    // convert to “Y”
      char = 89;
      break;
    
  }
  return char;
}

Leave a comment

Your email address will not be published. Required fields are marked *