X-Authentication-Warning: delorie.com: mail set sender to djgpp-workers-bounces using -f Date: Thu, 3 Feb 2005 14:35:13 +0300 (MSK) From: "Alexander Aganichev" Sender: aaganichev AT yandex DOT ru Message-Id: <42020C71.000017.28573@tide.yandex.ru> MIME-Version: 1.0 X-Mailer: Yamail [ http://yandex.ru ] To: djgpp-workers AT delorie DOT com Subject: setlocal.c X-source-ip: 208.248.82.254 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit Reply-To: djgpp-workers AT delorie DOT com Hello, I have integrated Brian's patches in my code. Also, I found a bug with setting separate parts of locale in single scope (f.e. setlocal(LC_ALL, "C,ru_RU.866,C")), made it possible to set locale without codepage (i.e. "ru_RU" instead of "ru_RU.866") and a bit improve speed by using memcpy instead of strcpy where possible. I'm not sure to which version I should apply diff, so here is the whole file at once. The what I want to put into libc more is regex patch, but I'm not sure about the steps to submit changes. May be we can start from the warning elimination and procede with locale support... -- Alexander Aganichev /* * setlocal.c: Set or read international environment * * Copyright (C) 2002, 2005 Alexander S. Aganichev * * This software may be used freely so long as this copyright notice is * left intact. There is no warranty on this software. */ #include #include #include #include #include #include #ifdef TEST #include #else #include #endif #include #include /* * Number of supported categories */ #define LC_CATEGORIES 5 /* * Maximum name length for locale */ #define LC_MAXNAMESIZE 16 /* * Buffer size for the defined locales */ #define LC_BUFSIZ (LC_MAXNAMESIZE * LC_CATEGORIES) /* * This variable contains the locales defined in the order LC_COLLATE, * LC_CTYPE, LC_MONETARY, LC_NUMERIC, LC_TIME separated by comma. If all * locales are the same, then locale specified only once. */ static char lc_buffer[LC_BUFSIZ]; static char lc_current[LC_CATEGORIES][LC_MAXNAMESIZE] = { "C", "C", "C", "C", "C" }; /* * This is what we can extract from country.sys for our purposes. */ static char currency_symbol[6] = ""; static char mon_decimal_point[2] = ""; static char mon_thousands_sep[2] = ""; static char decimal_point[2] = "."; static char thousands_sep[2] = ""; extern unsigned char __dj_collate_table[]; extern char __dj_date_format[]; extern char __dj_time_format[]; /* * Reset collate table to the C locale */ static int resetlocalecollate(void) { int i; for (i = 0; i < 256; i++) __dj_collate_table[i] = i; return 1; } /* * Reset ctype tables to the C locale */ static int resetlocalectype(void) { int i; /* ASCII portion always left unchanged */ for (i = 128; i < 256; i++) { __dj_ctype_tolower[i + 1] = i; __dj_ctype_toupper[i + 1] = i; __dj_ctype_flags[i + 1] = 0; } return 1; } /* * Reset monetary to the C locale * lcnv should be non-NULL */ static int resetlocalemonetary(void) { int honored = 0; struct lconv *lcnv = localeconv(); if(lcnv != NULL) { *currency_symbol = '\0'; lcnv->int_curr_symbol = lcnv->currency_symbol = currency_symbol; *mon_thousands_sep = '\0'; lcnv->mon_thousands_sep = mon_thousands_sep; *mon_decimal_point = '\0'; lcnv->mon_decimal_point = mon_decimal_point; /* lcnv->mon_grouping = ""; */ /* lcnv->negative_sign = ""; */ /* lcnv->positive_sign = ""; */ lcnv->int_frac_digits = lcnv->frac_digits = CHAR_MAX; lcnv->p_cs_precedes = lcnv->n_cs_precedes = CHAR_MAX; lcnv->p_sep_by_space = lcnv->n_sep_by_space = CHAR_MAX; /* lcnv->p_sign_posn = lcnv->n_sign_posn = CHAR_MAX; */ honored = 1; } return honored; } /* * Reset numeric to the C locale * lcnv should be non-NULL */ static int resetlocalenumeric(void) { int honored = 0; struct lconv *lcnv = localeconv(); if(lcnv != NULL) { *thousands_sep = '\0'; lcnv->thousands_sep = thousands_sep; strcpy(decimal_point, "."); lcnv->decimal_point = decimal_point; /* lcnv->grouping = ""; */ honored = 1; } return honored; } /* * Reset time strings to the C locale */ static int resetlocaletime(void) { strcpy(__dj_date_format, "%m/%d/%y"); strcpy(__dj_time_format, "%H:%M:%S"); return 1; } /* * Set collate table to the locale specified * regs->x.bx = code page * regs->x.dx = country ID * regs->x.es/di = segment/offset */ static int setlocalecollate(const char *locale __attribute__((unused)), int selector, __dpmi_regs *regs) { regs->h.ah = 0x65; regs->h.al = 0x06; regs->x.cx = 5; __dpmi_int(0x21, regs); if ((regs->x.flags & 1) || (regs->x.cx != 5)) return 0; else { unsigned int table = _farpeekw(selector, 3) * 16 + _farpeekw(selector, 1); int size = _farpeekw(_dos_ds, table); movedata(_dos_ds, table + 2, _my_ds(), (unsigned int) __dj_collate_table, size); return 1; } } /* * Set ctype table to the locale specified * regs->x.bx = code page * regs->x.dx = country ID * regs->x.es/di = segment/offset */ static int setlocalectype(const char *locale __attribute__((unused)), int selector, __dpmi_regs *regs) { int temp_flags; int i; regs->h.ah = 0x65; regs->h.al = 0x02; regs->x.cx = 5; __dpmi_int(0x21, regs); if ((regs->x.flags & 1) || (regs->x.cx != 5)) return 0; else { unsigned int table = _farpeekw(selector, 3) * 16 + _farpeekw(selector, 1); int size = _farpeekw(_dos_ds, table); movedata(_dos_ds, table + 2, _my_ds(), (unsigned int) &(__dj_ctype_toupper[128 + 1]), size); /* let's build lowercase table from uppercase... */ for (i = 0; i < size; i++) { int c = toupper(i + 128); if ((c != i + 128) && (c > 127)) __dj_ctype_tolower[c + 1] = i + 128; } for (i = 128; i < 256; i++) { /* * Actually isgraph(), ispunct() and isspace() will return wrong results * for some letters like 0xff in CP866 but we can't detect them reliably */ temp_flags = __dj_ISPRINT | __dj_ISGRAPH; if (tolower(i) != toupper(i)) { temp_flags |= __dj_ISALPHA | __dj_ISALNUM; if (i == toupper(i)) temp_flags |= __dj_ISUPPER; else temp_flags |= __dj_ISLOWER; } else temp_flags |= __dj_ISPUNCT; __dj_ctype_flags[i + 1] = temp_flags; } return 1; } } /* * Set monetary values to the locale specified */ static int setlocalemonetary(const char *locale, int selector, __dpmi_regs *regs __attribute__((unused))) { struct lconv *lcnv = localeconv(); if(lcnv == NULL) return 0; else { /* parse: de_DE_EURO.850 */ const char *p = strrchr(locale, '_'); if ((p != NULL) && !strnicmp(p + 1, "EURO", 4)) strcpy(currency_symbol, "EUR"); else movedata(selector, 9, _my_ds(), (unsigned) currency_symbol, 5); lcnv->int_curr_symbol = lcnv->currency_symbol = currency_symbol; movedata(selector, 14, _my_ds(), (unsigned) mon_thousands_sep, 2); lcnv->mon_thousands_sep = mon_thousands_sep; movedata(selector, 16, _my_ds(), (unsigned) mon_decimal_point, 2); lcnv->mon_decimal_point = mon_decimal_point; /* lcnv->mon_grouping = ""; */ /* lcnv->negative_sign = ""; */ /* lcnv->positive_sign = ""; */ lcnv->int_frac_digits = lcnv->frac_digits = _farpeekb(selector, 24); lcnv->p_cs_precedes = lcnv->n_cs_precedes = _farpeekb(selector, 23) & 1; lcnv->p_sep_by_space = lcnv->n_sep_by_space = _farpeekb(selector, 23) & 2; /* lcnv->p_sign_posn = lcnv->n_sign_posn = CHAR_MAX; */ return 1; } } /* * Set numeric values to the locale specified */ static int setlocalenumeric(const char *locale __attribute__((unused)), int selector, __dpmi_regs *regs __attribute__((unused))) { struct lconv *lcnv = localeconv(); if(lcnv == NULL) return 0; else { movedata(selector, 14, _my_ds(), (unsigned) thousands_sep, 2); lcnv->thousands_sep = thousands_sep; movedata(selector, 16, _my_ds(), (unsigned) decimal_point, 2); lcnv->decimal_point = decimal_point; /* lcnv->grouping = ""; */ return 1; } } /* * Set time strings to the locale specified */ static int setlocaletime(const char *locale __attribute__((unused)), int selector, __dpmi_regs *regs __attribute__((unused))) { switch (_farpeekw(selector, 7)) { case 0: default: strcpy(__dj_date_format, "%m/%d/%y"); break; case 1: strcpy(__dj_date_format, "%d/%m/%y"); break; case 2: strcpy(__dj_date_format, "%y/%m/%d"); break; } __dj_date_format[2] = __dj_date_format[5] = _farpeekb(selector, 18); if (_farpeekb(selector, 24) & 1) strcpy(__dj_time_format, "%H:%M:%S"); else strcpy(__dj_time_format, "%I:%M:%S %p"); __dj_time_format[2] = __dj_time_format[5] = _farpeekb(selector, 20); return 1; } static const struct _cat { int type; const char *env; int (*reset)(void); int (*set)(const char *locale, int selector, __dpmi_regs *regs); } cat[LC_CATEGORIES] = { { LC_COLLATE, "LC_COLLATE", resetlocalecollate, setlocalecollate }, { LC_CTYPE, "LC_CTYPE", resetlocalectype, setlocalectype }, { LC_MONETARY, "LC_MONETARY", resetlocalemonetary, setlocalemonetary }, { LC_NUMERIC, "LC_NUMERIC", resetlocalenumeric, setlocalenumeric }, { LC_TIME, "LC_TIME", resetlocaletime, setlocaletime } }; static const struct _loc2id { int id; const char *loc; int len; } loc2id[] = { { 43, "de_AT", 5 }, { 43, "de_AT_EURO", 10 }, { 41, "de_CH", 5 }, { 49, "de_DE", 5 }, { 49, "de_DE_EURO", 10 }, { 352, "de_LU", 5 }, { 352, "de_LU_EURO", 10 }, { 61, "en_AU", 5 }, { 32, "en_BE", 5 }, { 4, "en_CA", 5 }, { 44, "en_GB", 5 }, { 61, "en_IE", 5 }, { 61, "en_IE_EURO", 10 }, { 64, "en_NZ", 5 }, { 1, "en_US", 5 }, { 27, "en_ZA", 5 }, { 54, "es_AR", 5 }, { 591, "es_BO", 5 }, { 56, "es_CL", 5 }, { 57, "es_CO", 5 }, { 506, "es_CR", 5 }, { 809, "es_DO", 5 }, { 593, "es_EC", 5 }, { 34, "es_ES", 5 }, { 34, "es_ES_EURO", 10 }, { 502, "es_GT", 5 }, { 504, "es_HN", 5 }, { 52, "es_MX", 5 }, { 507, "es_PA", 5 }, { 51, "es_PE", 5 }, { 595, "es_PY", 5 }, { 503, "es_SV", 5 }, { 598, "es_UY", 5 }, { 58, "es_VE", 5 }, { 372, "et_ET", 5 }, { 358, "fi_FI", 5 }, { 358, "fi_FI_EURO", 10 }, { 32, "fr_BE", 5 }, { 32, "fr_BE_EURO", 10 }, { 2, "fr_CA", 5 }, { 41, "fr_CH", 5 }, { 352, "fr_LU", 5 }, { 352, "fr_LU_EURO", 10 }, { 33, "fr_FR", 5 }, { 33, "fr_FR_EURO", 10 }, { 30, "gr_GR", 5 }, { 30, "gr_GR_EURO", 10 }, { 972, "he_IL", 5 }, { 385, "hr_HR", 5 }, { 36, "hu_HU", 5 }, { 354, "is_IS", 5 }, { 41, "it_CH", 5 }, { 39, "it_IT", 5 }, { 39, "it_IT_EURO", 10 }, { 370, "lt_LT", 5 }, { 371, "lv_LV", 5 }, { 389, "mk_MK", 5 }, { 91, "mr_IN", 5 }, { 356, "mt_MT", 5 }, { 32, "nl_BE", 5 }, { 32, "nl_BE_EURO", 10 }, { 31, "nl_NL", 5 }, { 31, "nl_NL_EURO", 10 }, { 47, "no_NO", 5 }, { 48, "pl_PL", 5 }, { 55, "pt_BR", 5 }, { 351, "pt_PT", 5 }, { 351, "pt_PT_EURO", 10 }, { 40, "ro_RO", 5 }, { 7, "ru_RU", 5 }, { 38, "sh_YU", 5 }, { 42, "sk_SK", 5 }, { 386, "sl_SI", 5 }, { 355, "sq_AL", 5 }, { 381, "sr_YU", 5 }, { 358, "sv_FI", 5 }, { 46, "sv_SE", 5 }, { 91, "ta_IN", 5 }, { 200, "th_TH", 5 }, { 90, "tr_TR", 5 }, { 804, "uk_UA", 5 }, { 84, "vi_VN", 5 } }; /* * Set or read international environment */ char * setlocale(int category, const char *locale) { int honored = 1; int i; if (locale != NULL) { int segment = -1, selector = -1; __dpmi_regs regs; char buf[LC_MAXNAMESIZE]; char *p1, *p2; strncpy(lc_buffer, locale, LC_BUFSIZ); lc_buffer[LC_BUFSIZ - 1] = '\0'; p1 = lc_buffer - 1; p2 = lc_buffer; for (i = 0; i < LC_CATEGORIES; i++) { locale = p2; p1 = strchr (p2, ','); if (p1 == NULL) { p1 = p2; } else { *p1 = '\0'; p2 = p1 + 1; } if ((category == LC_ALL) || (cat[i].type == category)) { if (*locale == '\0') { const char *env; if ((env = getenv (cat[i].env)) != NULL) { locale = env; } else if ((env = getenv ("LC_ALL")) != NULL) { locale = env; } else if ((env = getenv ("LANG")) != NULL) { locale = env; } } if (!stricmp(locale, "C") || !stricmp(locale, "POSIX")) { if (cat[i].reset() == 0) { honored = 0; continue; } } else { int CID, CCP; size_t j; /* Allocate DOS memory */ if (segment == -1) { if ((segment = __dpmi_allocate_dos_memory(3, &selector)) == -1) { honored = 0; continue; } } /* Now try to find out the country/codepage */ CID = 0xffff; CCP = 0xffff; if (*locale != '\0') { size_t len; const char *p = strchr(locale, '.'); if (p == NULL) p = locale + strlen(locale); len = p - locale; for (j = 0; j < sizeof(loc2id) / sizeof(*loc2id); j++) if (!strncmp(locale, loc2id[j].loc, len >= loc2id[j].len ? len : loc2id[j].len)) { CID = loc2id[j].id; break; } if (*p == '.') CCP = atoi(p + 1); /* User requested the country/codepage we doesn't know about */ if ((CID == 0xffff) || ((*p == '.') && (CCP == 0xffff))) { honored = 0; continue; } } regs.h.ah = 0x65; regs.h.al = 0x01; regs.x.bx = CCP; regs.x.dx = CID; regs.x.cx = 41; regs.x.es = segment; regs.x.di = 0; __dpmi_int(0x21, ®s); if ((regs.x.flags & 1) || (regs.x.cx != 41)) { honored = 0; continue; } if (*locale == '\0') { CID = _farpeekw(selector, 3); CCP = _farpeekw(selector, 5); locale = "??_??"; for (j = 0; j < sizeof(loc2id) / sizeof(*loc2id); j++) if (loc2id[j].id == CID) { locale = loc2id[j].loc; break; } sprintf(buf, "%s.%u", locale, CCP); locale = buf; } /* regs.x.bx, regs.x.dx, regs.x.es/di are preserved by DOS */ if (cat[i].set(locale, selector, ®s) == 0) { honored = 0; continue; } } strncpy(lc_current[i], locale, LC_MAXNAMESIZE); lc_current[i][LC_MAXNAMESIZE - 1] = '\0'; } } if (segment != -1) __dpmi_free_dos_memory(selector); } if (honored) { if (category != LC_ALL) { for (i = 0; i < LC_CATEGORIES; i++) if (cat[i].type == category) return lc_current[i]; return NULL; } if (!stricmp(lc_current[0], lc_current[1]) && !stricmp(lc_current[1], lc_current[2]) && !stricmp(lc_current[2], lc_current[3]) && !stricmp(lc_current[3], lc_current[4])) { return lc_current[0]; } else { char *p; p = lc_buffer; for (i = 0; i < LC_CATEGORIES; i++) { size_t len = strlen(lc_current[i]); memcpy(p, lc_current[i], len + 1); p += len; if (i != (LC_CATEGORIES - 1)) *p++ = ','; } return lc_buffer; } } else return NULL; } #ifdef TEST extern unsigned char __dj_collate_table[256]; extern char __dj_date_format[]; extern char __dj_time_format[]; int main(int ac, char *av[]) { int i; const char *loc = (ac == 1) ? "" : av[1]; char *lc; lc = setlocale(LC_ALL, loc); lc = setlocale(LC_ALL, NULL); printf("Locale: %s\n", lc ? lc : "not detected"); for (i = 32; i < 256; i++) printf("%c%c%c|", (char) i, tolower(i), toupper(i)); printf("\n"); for (i = 0; i < 256; i++) printf("%02xh ", __dj_collate_table[i]); printf("\n%f\n%s %s\n", 1000456.23, __dj_date_format, __dj_time_format); return 0; } #endif