Due to differences in file names encoding and length limit in Windows (255 UCS-2 characters = 510 bytes) and unix-like operating systems' filesystems (UTF-8 string limited by 255 bytes) long non-ASCII filenames valid for NTFS may be invalid for unixes. This patch allows autogeneration of shortened filenames that are always valid for unix filesystems (tested with ZFS).
Unfortunately I could not successfully explain this issue to transmission developers.
This patch makes transmission to generate shortend names for such files on the fly.
--- utils.c-1 2014-05-25 10:24:31.238401512 +0200
+++ utils.c 2014-05-25 10:24:31.139399184 +0200
@@ -368,6 +368,10 @@
return 0;
}
+#ifdef UTF8NAMES_DEBUG
+FILE *name_log=NULL;
+#endif
+
char*
tr_buildPath (const char *first_element, ...)
{
@@ -377,6 +381,17 @@
va_list vl;
size_t bufLen = 0;
+#ifdef UTF8NAMES
+ /*size_t lastLen;*/
+ char *comp_start, *comp_end; // positions of current path component
+ unsigned char checksum = 0;
+#endif
+#ifdef UTF8NAMES_DEBUG
+ if( name_log == NULL) {
+ name_log = fopen("/tmp/name_debug.log", "wb");
+ }
+#endif
+
/* pass 1: allocate enough space for the string */
va_start (vl, first_element);
element = first_element;
@@ -410,6 +425,58 @@
/* sanity checks & return */
assert (pch - buf == (off_t)bufLen);
+
+#ifdef UTF8NAMES
+ // walk all path records and shrink then to no more than 255 bytes
+ for(comp_start = buf;;) {
+ comp_end = strchrnul(comp_start, TR_PATH_DELIMITER); // points to separator or terminal zero
+ if (comp_end-comp_start > 255) {
+ // do shrink
+ /* if name_length>255
+ ... find start of extension, let it be a string end
+ ... cut from file name by excess length
+ ... if first char after cut is 0b10xxxxxx (UTF-8 continuation) cut more until start of UTF-8 character
+ */
+ size_t excess=comp_end-comp_start-255+2; /* 2 bytes for control sum */
+ char *extPos;
+ char *cutPos;
+#ifdef UTF8NAMES_DEBUG
+ fprintf(name_log, "long name at %d len %ld buflen %ld: %s\n", comp_start-buf, comp_end-comp_start, bufLen, comp_start);
+#endif
+ extPos=comp_end;
+ while( extPos>comp_start && *(extPos-1) != '.' ) { /* finish after dot or at string start */
+ extPos--;
+ }
+ if( extPos<(comp_start+excess+4) ) { /* +4 - reserve for utf-8 truncating */
+ /* too long extension or dot was not found, reset to end of string */
+ extPos=comp_end;
+ } else {
+ extPos--; /* go to point to dot */
+ }
+
+ cutPos = extPos -excess;
+ while( cutPos>comp_start && ((*cutPos&0xC0)==0x80) ) { /* remove UTF-8 continuations */
+ cutPos--;
+ }
+ for( char *counter = cutPos; counter<=extPos; counter++ ) { // sum removed bytes
+ checksum ^= *counter;
+ }
+ sprintf( cutPos, "%02X", checksum );
+ {
+ char *x=cutPos+2, *y=extPos;
+ while (*x++=*y++); // copying overlapped strings
+ }
+ comp_end -= extPos-cutPos-2;
+#ifdef UTF8NAMES_DEBUG
+ fprintf( name_log, "Cutted %d at %d ext %d result: %s\n", excess, cutPos-buf, extPos-buf, buf );
+#endif
+ };
+
+ if (*comp_end == 0) break; // that was last
+ comp_start = comp_end+1; // set next component to next char after separator
+ };
+#endif
+
return buf;
}