Skip to content

Instantly share code, notes, and snippets.

@Hesamedin
Last active September 5, 2024 17:06
Show Gist options
  • Save Hesamedin/658faa35d5e01227bb81bf382ccfcf5e to your computer and use it in GitHub Desktop.
Save Hesamedin/658faa35d5e01227bb81bf382ccfcf5e to your computer and use it in GitHub Desktop.
BarcodeAnalyser has a few methods to validate the VIN number. The first method, extractVIN is useful when your OCR gives you a passage and you want to extract the VIN from it. The second one validates the VIN to make sure it is a valid VIN.
/**
* To know more about the Check digit:
* https://en.wikibooks.org/wiki/Vehicle_Identification_Numbers_(VIN_codes)/Check_digit
*
* An interface to test your VIN number:
* https://vpic.nhtsa.dot.gov/decoder/CheckDigit/Index/IJTJHZKFA7M203455
**/
class BarcodeAnalyser {
/**
* 17 characters: Standard for vehicles manufactured since 1981.
* Less than 17 characters: Common for vehicles manufactured before 1981, with lengths varying
* based on the manufacturer and region. Because of the variety of this format, we don't
* consider VIN with the length less/longer than 17 characters long.
* As OCR might mixes 0 by O (or 1 by I) then we do some corrections to make sure the identified
* VIN is accurate.
*/
@VisibleForTesting
fun extractVIN(text: String): String {
// regex pattern to correctly extract VIN
val regex = Regex("[a-zA-Z0-9]{17}")
val matchResult = regex.find(text)
// Extract the matched VIN and convert to uppercase for consistency
val vin = matchResult?.value?.uppercase(Locale.US) ?: ""
// Early return if the VIN does not meet basic criteria
if (vin.isEmpty() || !vin.any { it.isLetter() } || !vin.any { it.isDigit() }) return ""
// Replace invalid characters if present
val cleanedVin = vin.replace('I', '1')
.replace('O', '0')
.replace('Q', '0')
// Final check to ensure no spaces (not necessary with current regex but kept for safety)
return if (cleanedVin.contains(' ')) "" else cleanedVin
}
/**
* This function does simple VIN validation.
* It checks for proper length, characters, and a valid check digit.
* Return true if this is a valid VIN, false otherwise.
*/
@VisibleForTesting
fun validate(vin: String): Boolean {
val vinLower = vin.lowercase()
val vinPattern = "^[a-hj-npr-z0-9]{8}[0-9xX][a-hj-npr-z0-9]{8}$".toRegex()
if (!vinPattern.matches(vinLower)) {
return false
}
val transliterationTable = mapOf(
'0' to 0, '1' to 1, '2' to 2, '3' to 3, '4' to 4, '5' to 5, '6' to 6, '7' to 7, '8' to 8, '9' to 9,
'a' to 1, 'b' to 2, 'c' to 3, 'd' to 4, 'e' to 5, 'f' to 6, 'g' to 7, 'h' to 8,
'j' to 1, 'k' to 2, 'l' to 3, 'm' to 4, 'n' to 5, 'p' to 7, 'r' to 9,
's' to 2, 't' to 3, 'u' to 4, 'v' to 5, 'w' to 6, 'x' to 7, 'y' to 8, 'z' to 9
)
val weightsTable = listOf(8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2)
var sum = 0
for (i in vinLower.indices) {
val char = vinLower[i]
val transliteratedValue = transliterationTable[char] ?: 0
sum += transliteratedValue * weightsTable[i]
}
val mod = sum % 11
return if (mod == 10) vinLower[8] == 'x' else vinLower[8].digitToInt() == mod
}
}
@Hesamedin
Copy link
Author

VIN Validator

This module does simple VIN validation. It checks for proper length, characters, and a valid check digit.
To know more about the Check digit: https://en.wikibooks.org/wiki/Vehicle_Identification_Numbers_(VIN_codes)/Check_digit
An interface to test your VIN: https://vpic.nhtsa.dot.gov/decoder/CheckDigit/Index/IJTJHZKFA7M203455

Usage

import org.junit.Assert.assertEquals
import org.junit.Assert.assertFalse
import org.junit.Assert.assertTrue
import org.junit.Before
import org.junit.Test

class ExtractVINTest {
    private lateinit var analyzer: BarCodeAnalyser

    @Before
    fun setUp() {
        analyzer = BarCodeAnalyser()
    }

    @Test
    fun `test valid VIN extraction`() {
        // Valid VINs
        assertEquals("W1N0G8EB1LF843123", analyzer.extractVIN("This is the first vin W1N0G8EB1LF843123 I want to test"))
        assertEquals("LVY062ML2NP249146", analyzer.extractVIN("The VIN is LVY062ML2NP249146"))
    }

    @Test
    fun `test VIN with invalid characters replaced`() {
        // VINs with invalid characters (I, O, Q) should be replaced
        assertEquals("W1N1G8EB1LF843123", analyzer.extractVIN("This is the vin W1NIG8EB1LF843123 with 'I' replaced"))
        assertEquals("W1N0G8EB1LF843123", analyzer.extractVIN("Another vin W1NOG8EB1LF843123 with 'O' replaced"))
        assertEquals("W1N0G8EB1LF843123", analyzer.extractVIN("And another W1NQG8EB1LF843123 with 'Q' replaced"))
    }

    @Test
    fun `test invalid VIN with only letters or numbers`() {
        // VINs with only letters or only numbers should return empty
        assertEquals("", analyzer.extractVIN("Invalid VIN AAAAAAAAAAAAAAAAA"))
        assertEquals("", analyzer.extractVIN("Invalid VIN 12345678901234567"))
    }

    @Test
    fun `test VIN extraction with spaces`() {
        // VIN with spaces should be rejected
        assertEquals("", analyzer.extractVIN("Invalid VIN with space W1N0G8EB1L F843123"))
        assertEquals("", analyzer.extractVIN("Another invalid VIN W1N0G8EB1LF8 43123"))
    }

    @Test
    fun `test VIN extraction with punctuation`() {
        // VIN should still extract correctly even if surrounded by punctuation
        assertEquals("W1N0G8EB1LF843123", analyzer.extractVIN("This VIN: W1N0G8EB1LF843123, is valid."))
        assertEquals("LVY062ML2NP249146", analyzer.extractVIN("Check this VIN! LVY062ML2NP249146?"))
    }

    @Test
    fun `test VIN extraction when no valid VIN is present`() {
        // No VIN present in the text
        assertEquals("", analyzer.extractVIN("No VIN here!"))
        assertEquals("", analyzer.extractVIN("Just some random text without a VIN."))
    }

    @Test
    fun `validate should return true for valid VINs`() {
        assertTrue(analyzer.validate("JTJHZKFA7M2034559")) // Valid VIN
        assertTrue(analyzer.validate("SCBCR63W55C024793")) // Valid VIN
        assertTrue(analyzer.validate("1G8ZH5281YZ265849")) // Valid VIN
        assertTrue(analyzer.validate("JH4KA3240JC014910")) // Valid VIN
    }

    @Test
    fun `validate should return false for VINs with invalid length`() {
        assertFalse(analyzer.validate("1HGCM82633A12345"))  // Only 16 characters
        assertFalse(analyzer.validate("1HGCM82633A1234567")) // 18 characters, should be 17
    }

    @Test
    fun `validate should return false for VINs with invalid characters`() {
        assertFalse(analyzer.validate("12345678901234567")) // Invalid characters
        assertFalse(analyzer.validate("1HGCM82633A12345@")) // Contains invalid character '@'
    }

    @Test
    fun `validate should return false for VINs with incorrect check digit`() {
        assertFalse(analyzer.validate("1HGCM82633A12345G")) // Invalid check digit 'G'
        assertFalse(analyzer.validate("JHMCM56557C40445X")) // 'X' not at correct position
    }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment