Due to differences in file names encoding and length limit in Windows (255 UCS-2 characters = 510 bytes) and unix-like operating systems' filesystems (UTF-8 string limited by 255 bytes) long non-ASCII filenames valid for NTFS may be invalid for unixes. This patch allows autogeneration of shortened filenames that are always valid for unix filesystems (tested with ZFS).
Unfortunately I could not successfully explain this issue to transmission developers.
This patch makes transmission to generate shortend names for such files on the fly.
--- utils.c-1 2014-05-25 10:24:31.238401512 +0200 +++ utils.c 2014-05-25 10:24:31.139399184 +0200 @@ -368,6 +368,10 @@ return 0; } +#ifdef UTF8NAMES_DEBUG +FILE *name_log=NULL; +#endif + char* tr_buildPath (const char *first_element, ...) { @@ -377,6 +381,17 @@ va_list vl; size_t bufLen = 0; +#ifdef UTF8NAMES + /*size_t lastLen;*/ + char *comp_start, *comp_end; // positions of current path component + unsigned char checksum = 0; +#endif +#ifdef UTF8NAMES_DEBUG + if( name_log == NULL) { + name_log = fopen("/tmp/name_debug.log", "wb"); + } +#endif + /* pass 1: allocate enough space for the string */ va_start (vl, first_element); element = first_element; @@ -410,6 +425,58 @@ /* sanity checks & return */ assert (pch - buf == (off_t)bufLen); + +#ifdef UTF8NAMES + // walk all path records and shrink then to no more than 255 bytes + for(comp_start = buf;;) { + comp_end = strchrnul(comp_start, TR_PATH_DELIMITER); // points to separator or terminal zero + if (comp_end-comp_start > 255) { + // do shrink + /* if name_length>255 + ... find start of extension, let it be a string end + ... cut from file name by excess length + ... if first char after cut is 0b10xxxxxx (UTF-8 continuation) cut more until start of UTF-8 character + */ + size_t excess=comp_end-comp_start-255+2; /* 2 bytes for control sum */ + char *extPos; + char *cutPos; +#ifdef UTF8NAMES_DEBUG + fprintf(name_log, "long name at %d len %ld buflen %ld: %s\n", comp_start-buf, comp_end-comp_start, bufLen, comp_start); +#endif + extPos=comp_end; + while( extPos>comp_start && *(extPos-1) != '.' ) { /* finish after dot or at string start */ + extPos--; + } + if( extPos<(comp_start+excess+4) ) { /* +4 - reserve for utf-8 truncating */ + /* too long extension or dot was not found, reset to end of string */ + extPos=comp_end; + } else { + extPos--; /* go to point to dot */ + } + + cutPos = extPos -excess; + while( cutPos>comp_start && ((*cutPos&0xC0)==0x80) ) { /* remove UTF-8 continuations */ + cutPos--; + } + for( char *counter = cutPos; counter<=extPos; counter++ ) { // sum removed bytes + checksum ^= *counter; + } + sprintf( cutPos, "%02X", checksum ); + { + char *x=cutPos+2, *y=extPos; + while (*x++=*y++); // copying overlapped strings + } + comp_end -= extPos-cutPos-2; +#ifdef UTF8NAMES_DEBUG + fprintf( name_log, "Cutted %d at %d ext %d result: %s\n", excess, cutPos-buf, extPos-buf, buf ); +#endif + }; + + if (*comp_end == 0) break; // that was last + comp_start = comp_end+1; // set next component to next char after separator + }; +#endif + return buf; }