• 7 Posts
  • 6 Comments
Joined 10 months ago
cake
Cake day: March 9th, 2024

help-circle

  • This is the decode function if anyone is interested:

    decoded_reference()
    decoded_reference()
    {
        local yr_msd=${1:0:1}
        local yr_lsd=${1:1:1}
        local seq_enc_msd=${1:3:1}
        local seq_enc_lsd=${1:4:1}
        local seq_msd=${lookup_table_reverse[$seq_enc_msd]}
        local seq_lsd=${lookup_table_reverse[$seq_enc_lsd]}
        local seq_msd_index=$(typeset -p symbolset | grep -oP '[0-9]+(?=]="'"$seq_msd"'")')
        local seq_lsd_index=$(typeset -p symbolset | grep -oP '[0-9]+(?=]="'"$seq_lsd"'")')
        local seq=$((seq_msd_index * ln_symbolset + seq_lsd_index))
        local yr_msd_index=$(typeset -p symbolset | grep -oP '[0-9]+(?=]="'"$yr_msd"'")')
        local yr_lsd_index=$(typeset -p symbolset | grep -oP '[0-9]+(?=]="'"$yr_lsd"'")')
        local yr=$((ln_symbolset * ln_symbolset * 2 + yr_msd_index * ln_symbolset + yr_lsd_index)); # warning: the “2” is a dangerous hard-coding! Hopefully that bug manifests after I am dead
    
        printf '%s\n' "${yr}-$seq"
    };#decoded_reference
    

  • I probably need a perfect hash function. This code seems to do the job:

    encoded_reference()
    {
        local -r yr=$1
        local -r seqno=$2
        
        local -ar symbolset=(a b c d e f g h   j k   m n   p q r s t u v w x y z     2 3 4 5 6 7 8 9)
        local -a seedset=("${symbolset[@]}")
        local -r ln_symbolset=${#symbolset[@]}; # 31
        local ln_seedset=${#seedset[@]}
        local -A lookup_table=()
    
        for sym in "${symbolset[@]}"
        do
            pos=$((50 % ln_seedset)); # 50 is just an arbitrary static number
            lookup_table+=(["$sym"]=${seedset["$pos"]})
            seedset=(${seedset[@]/${seedset[$pos]}}); # remove used elements from the seedset
            ln_seedset=${#seedset[@]}
        done
        
        local yr_enc=${symbolset[$(((yr / ln_symbolset) % ln_symbolset))]}${symbolset[$(($yr % ln_symbolset))]}
        local most_sig_fig=$((seqno / ln_symbolset))
        local least_sig_fig=$((seqno % ln_symbolset))
        
        # caution: if the seqno exceeds ln_symbolset², this calculation is out of range
        local seq_enc=${lookup_table[${symbolset[$most_sig_fig]}]}${lookup_table[${symbolset[$least_sig_fig]}]}
        
        printf '%s\n' "answer → ${yr_enc}-$seq_enc"
    };#encoded_reference
    
    for yr in 2024 2025 2026
    do
        for seqno in {1..20}
        do
            encoded_reference "$yr" "$seqno"
        done
    done
    
    output

    answer → js-wy answer → js-w2 answer → js-w4 answer → js-w6 answer → js-w8 answer → js-wa answer → js-wd answer → js-wg answer → js-wk answer → js-wp answer → js-ws answer → js-wv answer → js-w3 answer → js-w9 answer → js-we answer → js-wm answer → js-wt answer → js-w5 answer → js-wf answer → js-wr answer → jt-wy answer → jt-w2 answer → jt-w4 answer → jt-w6 answer → jt-w8 answer → jt-wa answer → jt-wd answer → jt-wg answer → jt-wk answer → jt-wp answer → jt-ws answer → jt-wv answer → jt-w3 answer → jt-w9 answer → jt-we answer → jt-wm answer → jt-wt answer → jt-w5 answer → jt-wf answer → jt-wr answer → ju-wy answer → ju-w2 answer → ju-w4 answer → ju-w6 answer → ju-w8 answer → ju-wa answer → ju-wd answer → ju-wg answer → ju-wk answer → ju-wp answer → ju-ws answer → ju-wv answer → ju-w3 answer → ju-w9 answer → ju-we answer → ju-wm answer → ju-wt answer → ju-w5 answer → ju-wf answer → ju-wr

    This is close to ideal, but I just thought of another problem: what if a year-seq pair were to derive an encoded number like “fy-ou” or “us-uk” or “sh-it”? A bias that nearly ensures a digit is used would help avoid generating offending words. But I guess I’m getting well into over-engineering territory.


  • That is certainly a winner from the standpoint of code simplicity. And it’s trivially reversible. But I’m also prioritizing simplicity for human recipients above code simplicity. Base64 output is case sensitive and someone writing back and referencing a ref number would not necessarily preserve case. It’s also intolerant of human errors like confusing a “1” for a “l”.

    (edit) I think base32 would avoid the case sensitivity problem. So here’s a sample:

    for seq in {1..60}; do printf '%s → ' 2024-"$seq"; printf '%s\n' 2024-"$seq" | base32 | awk '{print tolower($1)}' | sed 's/=//g'; done
    
    output:
    2024-1  giydenbngefa
    2024-2  giydenbngifa
    2024-3  giydenbngmfa
    2024-4  giydenbngqfa
    2024-5  giydenbngufa
    2024-6  giydenbngyfa
    2024-7  giydenbng4fa
    2024-8  giydenbnhafa
    2024-9  giydenbnhefa
    2024-10  giydenbngeyau
    2024-11  giydenbngeyqu
    2024-12  giydenbngezau
    2024-13  giydenbngezqu
    2024-14  giydenbnge2au
    2024-15  giydenbnge2qu
    2024-16  giydenbnge3au
    2024-17  giydenbnge3qu
    2024-18  giydenbnge4au
    2024-19  giydenbnge4qu
    2024-20  giydenbngiyau
    2024-21  giydenbngiyqu
    2024-22  giydenbngizau
    2024-23  giydenbngizqu
    2024-24  giydenbngi2au
    2024-25  giydenbngi2qu