/* ───────────────────────────────────────────────────────────────
formatAndValidateAddress1 –
Will try to see if an address1 field contains a valid street and possibly
correct formatting if its wrong. Will return a null if its invalid.
Paste into a “JavaScript” node and end the script with:
return formatAndValidateAddress1($(Address));
(replace Address with your column name)
─────────────────────────────────────────────────────────────── */
function formatAndValidateAddress1(input) {
if (typeof input !== 'string' || !input.trim()) return null;
/* 0. LIGHT PRE‑CLEAN ------------------------------------------- */
var s = input.replace(/^[\s"']+|[\s"']+$/g, '') // outer quotes
.replace(/^[#!]+/, '') // leading !/# noise
.replace(/[.]/g, '') // drop dots
.replace(/\s*,\s*/g, ' , ') // mark commas with spaces
.replace(/\s+/g, ' ') // collapse spaces
.trim();
/* turn every “,” into a space *except* inside “PO Box …” patterns */
s = s.replace(/\s*,\s*/g, ' ');
/* 1. INSERT MISSING SPACES ------------------------------------- */
s = s.replace(/(\d)([A-Za-z])/g, '$1 $2') // 1229East → 1229 East
.replace(/([A-Za-z])(\d)/g, '$1 $2') // AptG4 → Apt G4
.replace(/\b([nesw](?:e|w)?)(\d)/gi, // nw9th → NW 9th
function(_,dir,num){ return dir.toUpperCase()+' '+num; })
.trim();
if (s.length < 6) return null;
/* 2. NORMALISATION DICTS --------------------------------------- */
var suffixMap = {
STREET:'St', ST:'St', AVENUE:'Ave', AV:'Ave',
ROAD:'Rd', RD:'Rd', BOULEVARD:'Blvd', BLVD:'Blvd',
LANE:'Ln', LN:'Ln', DRIVE:'Dr', DR:'Dr',
COURT:'Ct', CT:'Ct', TRAIL:'Trl', TRL:'Trl',
PARKWAY:'Pkwy', PKWY:'Pkwy', CIRCLE:'Cir', CIR:'Cir',
PLACE:'Pl', PL:'Pl', SQUARE:'Sq', SQ:'Sq',
LOOP:'Loop', TERRACE:'Ter', TER:'Ter',
WAY:'Way', HWY:'Hwy', HIGHWAY:'Hwy',
CR:'Cr' /* County Road */
};
var directionalMap = {
NORTH:'N', SOUTH:'S', EAST:'E', WEST:'W',
NORTHEAST:'NE', NORTHWEST:'NW', SOUTHEAST:'SE', SOUTHWEST:'SW',
N:'N', S:'S', E:'E', W:'W', NE:'NE', NW:'NW', SE:'SE', SW:'SW'
};
var unitMap = { APARTMENT:'Apt', APT:'Apt', SUITE:'Ste', STE:'Ste',
UNIT:'Unit', FLOOR:'Fl', LOT:'Lot', BLDG:'Bldg' };
var tokens = s.split(' ');
var out = [], suffixSeen=false;
function splitSuffixGlue(tok){
var up = tok.toUpperCase();
for(var i=2;i<=6;i++){
var tail = up.slice(-i);
if(suffixMap[tail]){
out.push(tok.slice(0,-i));
out.push(suffixMap[tail]);
suffixSeen = true;
return;
}
}
out.push(tok);
}
for(var i=0;i<tokens.length;i++){
var w=tokens[i], up=w.toUpperCase();
if(suffixMap[up]) { out.push(suffixMap[up]); suffixSeen=true; continue; }
if(directionalMap[up]) { out.push(directionalMap[up]); continue; }
if(unitMap[up]) { out.push(unitMap[up]); continue; }
splitSuffixGlue(w.replace(/^([A-Za-z])/, function(m){return m.toUpperCase();}));
}
/* add “#” before digit‑only token that follows a unit word */
var unitKeys=['Apt','Ste','Unit','Fl','Lot','Bldg'];
for(var u=0;u<out.length-1;u++){
if(unitKeys.indexOf(out[u])!==-1 && /^\d/.test(out[u+1]) && out[u+1].charAt(0)!=='#'){
out[u+1] = '#'+out[u+1];
}
}
var cleaned = out.join(' ');
/* 3. VALIDATION (lenient) -------------------------------------- */
var startsNum = /^\d/.test(cleaned) || /^\b[nesw]\b \d/i.test(cleaned);
var looksLikeBox = /^\s*(po|p\.?o\.?)?\s*box\b/i.test(cleaned);
if(!startsNum && !looksLikeBox) return null; // must start with # or “Box”
/* reject obvious non‑addresses */
if(/@|\.(com|net|org|gov|edu)$/i.test(cleaned)) return null;
return cleaned;
}
/* ---- Easy Data Transform call (only this line goes last) ---- */
return formatAndValidateAddress1($(Address));
Looks useful, thanks.
I tried it for List of real addresses · GitHub plus a few extra bogus rows.
addresses.transform (25.4 KB)
It looks like it is mostly written for US addresses.
In some cases I would think you probably want to keep the comma between street, town, state etc.