MULTI_STOPLIST

I've created a custom STOPLIST with ...
begin
ctx_ddl.create_stoplist('multistop', 'MULTI_STOPLIST');
ctx_ddl.add_stopword('multistop', 'ab','german');
ctx_ddl.add_stopword('multistop', 'Ab','german');
ctx_ddl.add_stopword('multistop', 'AB','german');
ctx_ddl.add_stopword('multistop', 'dann','german');
ctx_ddl.add_stopword('multistop', 'Dann','german');
ctx_ddl.add_stopword('multistop', 'THIS','english');
ctx_ddl.add_stopword('multistop', 'whose','english');
ctx_ddl.add_stopword('multistop', 'Whose','english');
ctx_ddl.add_stopword('multistop', 'WHOSE','english');
ctx_ddl.add_stopword('multistop', 'and','english');
ctx_ddl.add_stopword('multistop', 'And','english');
ctx_ddl.add_stopword('multistop', 'AND','english');
end;
exec ctx_ddl.create_preference('english_lexer','basic_lexer');
exec ctx_ddl.create_preference('german_lexer','basic_lexer');
exec ctx_ddl.set_attribute('german_lexer','composite','german');
exec ctx_ddl.set_attribute('german_lexer','mixed_case','yes');
exec ctx_ddl.set_attribute('german_lexer','index_themes','yes');
exec ctx_ddl.set_attribute('german_lexer','theme_language','german');
exec ctx_ddl.set_attribute('german_lexer','alternate_spelling','german');
exec ctx_ddl.create_preference('global_lexer', 'multi_lexer');
exec ctx_ddl.add_sub_lexer('global_lexer','default','german_lexer');
exec ctx_ddl.add_sub_lexer('global_lexer','english','english_lexer','eng');
create index doc_idx on documents(text_file) indextype is ctxsys.context
parameters ('DATASTORE CTXSYS.DIRECT_DATASTORE lexer global_lexer language column sprache_id STOPLIST multistop filter ctxsys.auto_filter');
after setting it to the PARAMETER STOPLIST for my index and creating the index I use the procedure CTX_DOC.TOKENS() to show the tokens of a document. And there are also terms of the STOPLIST included. WHATS WRONG ? Seems to be the STOPLIST isn't in use.

Although the documentation says that ctx_doc.tokens does not return stopwords, that seems to be false. In the example below, although there are stopwords from ctx_doc.tokens, they are not in the token_text column of the dr$doc_idx$i table.
SCOTT@orcl_11g>
SCOTT@orcl_11g> begin
  2    ctx_ddl.create_stoplist('multistop', 'MULTI_STOPLIST');
  3    ctx_ddl.add_stopword('multistop', 'ab','german');
  4    ctx_ddl.add_stopword('multistop', 'Ab','german');
  5    ctx_ddl.add_stopword('multistop', 'AB','german');
  6    ctx_ddl.add_stopword('multistop', 'dann','german');
  7    ctx_ddl.add_stopword('multistop', 'Dann','german');
  8    ctx_ddl.add_stopword('multistop', 'this','english');
  9    ctx_ddl.add_stopword('multistop', 'This','english');
10    ctx_ddl.add_stopword('multistop', 'THIS','english');
11    ctx_ddl.add_stopword('multistop', 'whose','english');
12    ctx_ddl.add_stopword('multistop', 'Whose','english');
13    ctx_ddl.add_stopword('multistop', 'WHOSE','english');
14    ctx_ddl.add_stopword('multistop', 'and','english');
15    ctx_ddl.add_stopword('multistop', 'And','english');
16    ctx_ddl.add_stopword('multistop', 'AND','english');
17  end;
18  /
PL/SQL procedure successfully completed.
SCOTT@orcl_11g> begin
  2    ctx_ddl.create_preference('english_lexer','basic_lexer');
  3    ctx_ddl.create_preference('german_lexer','basic_lexer');
  4    ctx_ddl.set_attribute('german_lexer','composite','german');
  5    ctx_ddl.set_attribute('german_lexer','mixed_case','yes');
  6    ctx_ddl.set_attribute('german_lexer','alternate_spelling','german');
  7    ctx_ddl.create_preference('global_lexer', 'multi_lexer');
  8    ctx_ddl.add_sub_lexer('global_lexer','default','german_lexer' );
  9    ctx_ddl.add_sub_lexer('global_lexer','english','english_lexer', 'eng');
10    ctx_ddl.add_sub_lexer('global_lexer','german','german_lexer', 'ger');
11  end;
12  /
PL/SQL procedure successfully completed.
SCOTT@orcl_11g> create table documents (id number primary key, text_file varchar2 (60), sprache_id varchar2 (3))
  2  /
Table created.
SCOTT@orcl_11g> insert into documents values (1, 'testing this whose and whatever', 'eng')
  2  /
1 row created.
SCOTT@orcl_11g> insert into documents values (2, 'Ich habe nichts zu sagen ab dann.', 'ger')
  2  /
1 row created.
SCOTT@orcl_11g> create index doc_idx on documents(text_file) indextype is ctxsys.context
  2  parameters
  3    ('DATASTORE       CTXSYS.DIRECT_DATASTORE
  4        lexer            global_lexer
  5        language column  sprache_id
  6        STOPLIST       multistop
  7        filter        ctxsys.auto_filter')
  8  /
Index created.
SCOTT@orcl_11g> declare
  2    the_tokens ctx_doc.token_tab;
  3  begin
  4    ctx_doc.set_key_type ('PRIMARY_KEY');
  5    ctx_doc.tokens('doc_idx','1',the_tokens);
  6    for i in 1..the_tokens.count loop
  7        dbms_output.put_line(the_tokens(i).token);
  8    end loop;
  9    ctx_doc.tokens('doc_idx','2',the_tokens);
10    for i in 1..the_tokens.count loop
11        dbms_output.put_line(the_tokens(i).token);
12    end loop;
13  end;
14  /
TESTING
THIS
WHOSE
AND
WHATEVER
Ich
habe
nichts
zu
sagen
ab
dann
PL/SQL procedure successfully completed.
SCOTT@orcl_11g> select token_text from dr$doc_idx$i
  2  /
TOKEN_TEXT
Ich
TESTING
WHATEVER
habe
nichts
sagen
zu
7 rows selected.
SCOTT@orcl_11g>

Similar Messages

  • Nls_language to be set for english stop words to work,10gR2 on Linux

    Hi,
    When we search a phrase using contains and if the phrase has a stop word in it then the stop word is identified by Oracle depending upon the session language.
    When I set the nls_language for the session to AMERICAN, stop word is not being detected and when I set the language to 'ENGLISH', it is being detected.
    Is this because the english being refered in the Full Text corresponds to 'ENGLISH' and not 'AMERICAN'.
    I guess it should refer to both american and english also.
    It would be nice if anyone can clarify this ASAP.
    Thanks a lot in advance.

    This is what I am trying to do while creating the index,
    begin
    execute immediate 'drop table text_search';
    exception
    when others then
    null;
    end;
    create table text_search (content clob, lang varchar2(3) );
    insert into text_search values ('Sun rises in the east','eng');
    insert into text_search values ('Sun cet evening','fre');
    commit;
    begin
    ctx_ddl.drop_preference('global_lexer');
    exception
    when others then
    null;
    end;
    begin
    ctx_ddl.drop_preference('english_lexer');
    exception
    when others then
    null;
    end;
    begin
    ctx_ddl.drop_preference('french_lexer');
    exception
    when others then
    null;
    end;
    begin
    ctx_ddl.drop_preference('german_lexer');
    exception
    when others then
    null;
    end;
    begin
    ctx_ddl.create_preference('english_lexer','basic_lexer');
    ctx_ddl.create_preference('french_lexer','basic_lexer');
    ctx_ddl.create_preference('german_lexer','basic_lexer');
    ctx_ddl.set_attribute('german_lexer','composite','german');
    ctx_ddl.set_attribute('german_lexer','mixed_case','yes');
    ctx_ddl.set_attribute('german_lexer','alternate_spelling','german');
    end;
    begin
    ctx_ddl.create_preference('global_lexer', 'multi_lexer');
    ctx_ddl.add_sub_lexer('global_lexer','english','english_lexer','eng');
    ctx_ddl.add_sub_lexer('global_lexer','french','french_lexer','fre');
    ctx_ddl.add_sub_lexer('global_lexer','german','german_lexer','ger');
    ctx_ddl.add_sub_lexer('global_lexer','default','english_lexer');
    end;
    begin
    ctx_ddl.drop_preference('mywordlist');
    exception
    when others then
    null;
    end;
    begin
    ctx_ddl.create_preference('mywordlist', 'BASIC_WORDLIST');
    ctx_ddl.set_attribute('mywordlist','STEMMER','ENGLISH');
    end;
    begin
    ctx_ddl.drop_stoplist('multistop');
    exception
    when others then
    null;
    end;
    begin
    ctx_ddl.create_stoplist('multistop', 'MULTI_STOPLIST');
    ctx_ddl.add_stopword('multistop', 'cet','french');
    ctx_ddl.add_stopword('multistop', 'the','english');
    ctx_ddl.add_stopword('multistop', 'for','english');
    ctx_ddl.add_stopword('multistop', 'all','english');
    ctx_ddl.add_stopword('multistop', 'in','english');
    end;
    begin
    execute immediate 'drop index my_text_search_idx';
    exception
    when others then
    null;
    end;
    CREATE INDEX my_text_search_idx ON text_search(content)
    INDEXTYPE IS CTXSYS.CONTEXT
    parameters ('datastore ctxsys.direct_datastore filter ctxsys.null_filter lexer global_lexer wordlist mywordlist language column lang stoplist multistop');
    while searching:
    NLS_LANGUAGE For the session is 'AMERICAN'.
    I run the query,
    select * from text_search where contains(content,'$"Sun rises in the east"') >0;
    gives no result.
    I change the session language as below and the query gives the result.
    alter session set nls_language = 'english' ;
    Same is happening when I tried for different languages.
    Please note that we need to maintain a custom stop list containing stop words for multiple languages.

  • About Multi_language features of Oracle Text.

    I have a customer who has to store into one table docs in different languages and
    use contains index to perform some text search.
    He would like to use the multi_language feature of Oracle Text.
    The database we are using is Oracle 10gR2
    We create a table with doc and language column, and then we have to create the context index.
    In documentation I found some info's about how set different lexer (MULTI_LEXER) for language that have different lexer,
    different stoplist (MULTI_STOPLIST) for different languages stop words,
    but I don't understand if it
    is possible use the stemmer features for different languages, and if there are some other features that I can set for using multi_language properties.
    Thank you in advance
    Paola

    According to the online documentation: "The Oracle Text stemmer, licensed from Xerox Corporation's XSoft Division, supports the following languages with the BASIC_LEXER: English, French, Spanish, Italian, German, and Dutch. Japanese stemming is supported with the JAPANESE_LEXER."
    Please see the demonstration below. Also, if you are using 10g, you can specify the language in the query, instead of changing the language for the session. 10g also has a world_lexer.
    scott@ORA92> CREATE TABLE your_table
      2    (id         NUMBER,
      3       doc         CLOB,
      4       lang         VARCHAR2 (3),
      5       CONSTRAINT  your_table_id_pk PRIMARY KEY (id))
      6  /
    Table created.
    scott@ORA92> INSERT ALL
      2  INTO your_table VALUES (1, 'They say only the good die young.', 'eng')
      3  INTO your_table VALUES (2, 'The dogs like the cats.',          'eng')
      4  INTO your_table VALUES (3, 'cats and dogs',               'eng')
      5  INTO your_table VALUES (4, 'cat and dog',                    'eng')
      6  INTO your_table VALUES (5, 'chats et chiens',               'fre')
      7  INTO your_table VALUES (6, 'chat et chien',               'fre')
      8  INTO your_table VALUES (7, 'Die Hunde mögen die Katzen',          'ger')
      9  INTO your_table VALUES (8, 'Katzen und Hunde',               'ger')
    10  INTO your_table VALUES (9, 'Katze und Hund',               'ger')
    11  SELECT * FROM DUAL
    12  /
    9 rows created.
    scott@ORA92> BEGIN
      2    ctx_ddl.create_preference ('english_lexer','basic_lexer');
      3    ctx_ddl.set_attribute      ('english_lexer','index_themes','yes');
      4    ctx_ddl.set_attribute      ('english_lexer','theme_language','english');
      5 
      6    ctx_ddl.create_preference ('french_lexer','basic_lexer');
      7    ctx_ddl.set_attribute      ('french_lexer','index_themes','yes');
      8    ctx_ddl.set_attribute      ('french_lexer','theme_language','french');
      9 
    10    ctx_ddl.create_preference ('german_lexer','basic_lexer');
    11    ctx_ddl.set_attribute      ('german_lexer','composite','german');
    12    ctx_ddl.set_attribute      ('german_lexer','alternate_spelling','german');
    13 
    14    CTX_DDL.CREATE_PREFERENCE ('global_lexer', 'MULTI_LEXER');
    15    ctx_ddl.add_sub_lexer      ('global_lexer','english','english_lexer', 'eng');
    16    ctx_ddl.add_sub_lexer      ('global_lexer','french','french_lexer', 'fre');
    17    ctx_ddl.add_sub_lexer      ('global_lexer','german','german_lexer','ger');
    18    ctx_ddl.add_sub_lexer      ('global_lexer','default','english_lexer');
    19 
    20    CTX_DDL.CREATE_STOPLIST ('global_stoplist', 'MULTI_STOPLIST');
    21    CTX_DDL.ADD_STOPWORD    ('global_stoplist', 'and', 'english');
    22    CTX_DDL.ADD_STOPWORD    ('global_stoplist', 'und', 'german');
    23    CTX_DDL.ADD_STOPWORD    ('global_stoplist', 'et', 'french');
    24    CTX_DDL.ADD_STOPWORD    ('global_stoplist', 'the', 'ALL');
    25    CTX_DDL.ADD_STOPWORD    ('global_stoplist', 'die', 'german');
    26  END;
    27  /
    PL/SQL procedure successfully completed.
    scott@ORA92> CREATE INDEX your_table_doc_idx
      2  ON your_table (doc)
      3  INDEXTYPE IS CTXSYS.CONTEXT
      4  PARAMETERS
      5    ('LEXER           global_lexer
      6        LANGUAGE COLUMN lang
      7        STOPLIST      global_stoplist')
      8  /
    Index created.
    scott@ORA92> ALTER SESSION SET NLS_LANGUAGE = 'AMERICAN'
      2  /
    Session altered.
    scott@ORA92> SELECT * FROM your_table WHERE CONTAINS (doc, 'die') > 0
      2  /
            ID DOC                                                                              LAN
             1 They say only the good die young.                                                eng
    scott@ORA92> SELECT * FROM your_table WHERE CONTAINS (doc, 'cat AND dog') > 0
      2  /
            ID DOC                                                                              LAN
             4 cat and dog                                                                      eng
    scott@ORA92> SELECT * FROM your_table WHERE  CONTAINS (doc, '$cat AND $dog') > 0
      2  /
            ID DOC                                                                              LAN
             4 cat and dog                                                                      eng
             3 cats and dogs                                                                    eng
             2 The dogs like the cats.                                                          eng
    scott@ORA92> ALTER SESSION SET NLS_LANGUAGE = 'FRENCH'
      2  /
    Session altered.
    scott@ORA92> SELECT * FROM your_table WHERE CONTAINS (doc, 'chat AND chien') > 0
      2  /
            ID DOC                                                                              LAN
             6 chat et chien                                                                    fre
    scott@ORA92> SELECT * FROM your_table WHERE  CONTAINS (doc, '$chat AND $chien') > 0
      2  /
            ID DOC                                                                              LAN
             6 chat et chien                                                                    fre
             5 chats et chiens                                                                  fre
    scott@ORA92> ALTER SESSION SET NLS_LANGUAGE = 'GERMAN'
      2  /
    Session altered.
    scott@ORA92> SELECT * FROM your_table WHERE CONTAINS (doc, 'Die') > 0
      2  /
    no rows selected
    scott@ORA92> SELECT * FROM your_table WHERE CONTAINS (doc, 'Katze AND Hund') > 0
      2  /
            ID DOC                                                                              LAN
             9 Katze und Hund                                                                   gerMessage was edited by:
    Barbara Boehmer

Maybe you are looking for

  • 24" display questions

    Hi, im close to placing an order for a mini (I use a 24" Imac in work) and want a 24" display for my work at home. I have seen 2 possible displays Dell 2408 (16:10, 1920 x 1200) and the dell 2409 (16:9, 1920 x 1080). My questions are: 1. can the I ma

  • Creating Multiple POs from the shopping cart

    Hi I have a requirement to create multiple POs from shopping cart. we are in SRM 7.0 I have implemented the BADI BBP_SC_TRANSFER_BE GROUP_PO method. modified group_1 field with 1 and Doc_type with custom document type and udpate the change parameter

  • How to get XML file using servlets that XI sent to my J2EE appl?

    Hi All! I have a scenario like XI sends xml file to j2ee application. In my J2EE application my servlet receives this xml. Will the xml file be in my HTTPServletRequest object? if so how to get that file from Request object. Please help me its urgent

  • Posting Period is locked at the time of CO11

    I am facing an issue in CO11..system is giving mesaage that Posting period in close. I hav echecked FI & MM Period is open. What could be the reason... Vivek Moderator: Please, avoid asking basic questions

  • How to retrieve Data from SAP BW to  SQL

    Hi All, i am looking for way to retrieve Data from SAP BW to SQL. Could you please give me some indcations or a document on how to implement it? Thanks in advance Cheers Gilo