Skip to content

Commit

Permalink
Mark preprocessor conditional and string operands
Browse files Browse the repository at this point in the history
Mark through an EC attribute macros that are used in C preprocessor
conditionals (#if, #ifdef defined()) and in C preprocessor string
processing (concatenation and stringization). Such object-like macros
cannot be converted into C constants (constant objects or enum values).
The set EC attributes can be used for determining which object-like
macros can be refactored into C-proper elements.
  • Loading branch information
dspinellis committed Nov 30, 2024
1 parent 9abdc91 commit 5bb6d05
Show file tree
Hide file tree
Showing 63 changed files with 14,051 additions and 13,869 deletions.
6 changes: 5 additions & 1 deletion src/attr.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2002-2015 Diomidis Spinellis
* (C) Copyright 2002-2024 Diomidis Spinellis
*
* This file is part of CScout.
*
Expand Down Expand Up @@ -64,6 +64,8 @@ string Attributes::attribute_names[] = {
"Macro",
"Undefined macro",
"Macro argument",
"Used in preprocessor constant",
"Value used as preprocessor string operand",

"File scope",
"Project scope",
Expand All @@ -90,6 +92,8 @@ string Attributes::attribute_short_names[] = {
"macro",
"umacro",
"macroarg",
"cppconst",
"cppstrval",

"fscope",
"pscope",
Expand Down
9 changes: 8 additions & 1 deletion src/attr.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2002-2015 Diomidis Spinellis
* (C) Copyright 2002-2024 Diomidis Spinellis
*
* This file is part of CScout.
*
Expand Down Expand Up @@ -32,10 +32,13 @@ using namespace std;

// Attributes that can be set for an EC
// Keep in sync with attribute_names[] and short_names[]
// Consider updating workdb_schema, insert_eclass
enum e_attribute {
is_declared_unused, // Declared with __unused__ attribute
is_macro_token, // Identifier stored in a macro
// Used to determine macro nesting

// User-visible attributes start here
is_readonly, // Read-only; true if any member
// comes from an ro file
// The four C namespaces
Expand All @@ -47,6 +50,10 @@ enum e_attribute {
is_macro, // Name of an object or function-like macro
is_undefined_macro, // Macro (heuristic: ifdef, defined)
is_macro_arg, // Macro argument
is_cpp_const, // Used to derive a preprocessor constant
// used in #if, #include, defined()
is_cpp_str_val, // Macro's value is used as a string (pasting
// or stringization) in the preprocessor
// The following are valid if is_ordinary is true:
is_cscope, // Compilation-unit (file) scoped
// identifier (static)
Expand Down
35 changes: 26 additions & 9 deletions src/macro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ stringize(const PtokenSequence& ts)
bool seen_space = true; // To delete leading spaces

for (pi = ts.begin(); pi != ts.end(); pi++) {
pi->set_cpp_str_val();
switch ((*pi).get_code()) {
case '\n':
case SPACE:
Expand Down Expand Up @@ -363,19 +364,26 @@ Macro::Macro( const Ptoken& name, bool id, bool isfun, bool isimmutable) :
mcall = NULL; // To void nasty surprises
}

static PtokenSequence subst(const Macro &m, dequePtoken is, const mapArgval &args, HideSet hs, bool skip_defined, const Macro *caller);
static PtokenSequence subst(const Macro &m, dequePtoken is, const mapArgval &args, HideSet hs, bool skip_defined, Macro::CalledContext context, const Macro *caller);
static PtokenSequence glue(PtokenSequence ls, PtokenSequence rs);
static bool fill_in(PtokenSequence &ts, bool get_more, PtokenSequence &removed);

/*
* Expand a token sequence
* If skip_defined is true then the defined() keyword is not processed
* The caller is used for registering invocations from one macro to another
* This is an implementation of Dave Prosser's algorithm, listed in
* X3J11/86-196
* X3J11/86-196 and in https://www.spinellis.gr/blog/20060626/.
* If token_source is get_more, then more tokens can be fetched.
* If defined_handling is skip then the defined() keyword is not processed
* If context denotes preprocessor then is_cpp_const attribute is set
* for identifiers.
* The caller is used for registering invocations from one macro to another.
*/
PtokenSequence
macro_expand(PtokenSequence ts, Macro::TokenSourceOption token_source, Macro::DefinedHandlingOption defined_handling, const Macro *caller)
macro_expand(PtokenSequence ts,
Macro::TokenSourceOption token_source,
Macro::DefinedHandlingOption defined_handling,
Macro::CalledContext context,
const Macro *caller)
{
PtokenSequence r; // Return value
auto ts_size = ts.size();
Expand All @@ -399,6 +407,11 @@ macro_expand(PtokenSequence ts, Macro::TokenSourceOption token_source, Macro::De
continue;
}

// Mark the identifier as used as a preprocessor constant
if (context == Macro::CalledContext::process_include
|| context == Macro::CalledContext::process_if)
head.set_ec_attribute(is_cpp_const);

const string name = head.get_val();
mapMacro::const_iterator mi(Pdtoken::macros_find(name));
if (!Pdtoken::macro_is_defined(mi)) {
Expand All @@ -422,7 +435,7 @@ macro_expand(PtokenSequence ts, Macro::TokenSourceOption token_source, Macro::De
Token::unify((*mi).second.name_token, head);
HideSet hs(head.get_hideset());
hs.insert(m.get_name_token());
PtokenSequence s(subst(m, m.value, mapArgval(), hs, defined_handling == Macro::DefinedHandlingOption::skip, caller));
PtokenSequence s(subst(m, m.value, mapArgval(), hs, defined_handling == Macro::DefinedHandlingOption::skip, context, caller));
ts.splice(ts.begin(), s);
caller = &m;
} else if (fill_in(ts, token_source == Macro::TokenSourceOption::get_more, removed_spaces) && ts.front().get_code() == '(') {
Expand All @@ -447,7 +460,7 @@ macro_expand(PtokenSequence ts, Macro::TokenSourceOption token_source, Macro::De
close.get_hideset().begin(), close.get_hideset().end(),
inserter(hs, hs.begin()));
hs.insert(m.get_name_token());
PtokenSequence s(subst(m, m.value, args, hs, defined_handling == Macro::DefinedHandlingOption::skip, caller));
PtokenSequence s(subst(m, m.value, args, hs, defined_handling == Macro::DefinedHandlingOption::skip, context, caller));
ts.splice(ts.begin(), s);
caller = &m;
} else {
Expand Down Expand Up @@ -484,7 +497,7 @@ find_nonspace(dequePtoken::iterator pos, dequePtoken::iterator end)
* hide set added to it, before getting returned.
*/
static PtokenSequence
subst(const Macro &m, dequePtoken is, const mapArgval &args, HideSet hs, bool skip_defined, const Macro *caller)
subst(const Macro &m, dequePtoken is, const mapArgval &args, HideSet hs, bool skip_defined, Macro::CalledContext context, const Macro *caller)
{
PtokenSequence os; // output sequence

Expand Down Expand Up @@ -561,7 +574,7 @@ subst(const Macro &m, dequePtoken is, const mapArgval &args, HideSet hs, bool sk
if ((ai = find_formal_argument(args, head)) == args.end())
break;
// Othewise expand head
PtokenSequence expanded(macro_expand(ai->second, Macro::TokenSourceOption::use_supplied, skip_defined ? Macro::DefinedHandlingOption::skip : Macro::DefinedHandlingOption::process, caller));
PtokenSequence expanded(macro_expand(ai->second, Macro::TokenSourceOption::use_supplied, skip_defined ? Macro::DefinedHandlingOption::skip : Macro::DefinedHandlingOption::process, context, caller));
os.splice(os.end(), expanded);
continue;
}
Expand All @@ -588,15 +601,19 @@ glue(PtokenSequence ls, PtokenSequence rs)
rs.pop_front();
if (ls.empty() && rs.empty())
return (ls);

// Glue ls.back() with rs.front()
Tchar::clear();
if (!ls.empty()) {
if (DP()) cout << "glue LS: " << ls.back() << endl;
Tchar::push_input(ls.back());
ls.back().set_cpp_str_val();
ls.pop_back();
}
if (!rs.empty()) {
if (DP()) cout << "glue RS: " << rs.front() << endl;
Tchar::push_input(rs.front());
rs.front().set_cpp_str_val();
rs.pop_front();
}
Tchar::rewind_input();
Expand Down
8 changes: 5 additions & 3 deletions src/macro.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,13 @@ typedef map<Tokid, MCall *> mapMacroBody;
// A macro definition
class Macro {
public:
// Typed options to the macro_expand function
// Typed options and arguments to the macro_expand function
// Get more tokens or only use the supplied ones
enum class TokenSourceOption { get_more, use_supplied };
// Process the defined() function or skip its processing
enum class DefinedHandlingOption { process, skip };
// Context in which macro_expand is called
enum class CalledContext { process_c, process_if, process_include };
private:
Ptoken name_token; // Name (used for unification)
bool is_function; // True if it is a function-macro
Expand Down Expand Up @@ -94,8 +96,8 @@ class Macro {
// Print it (for debugging)
friend ostream& operator<<(ostream& o,const Macro &m);

friend PtokenSequence macro_expand(PtokenSequence ts, Macro::TokenSourceOption token_source, Macro::DefinedHandlingOption defined_handling, const Macro *caller);
friend PtokenSequence macro_expand(PtokenSequence ts, Macro::TokenSourceOption token_source, Macro::DefinedHandlingOption defined_handling, Macro::CalledContext context, const Macro *caller);
};

PtokenSequence macro_expand(PtokenSequence ts, Macro::TokenSourceOption token_source, Macro::DefinedHandlingOption defined_handling, const Macro *caller = NULL);
PtokenSequence macro_expand(PtokenSequence ts, Macro::TokenSourceOption token_source, Macro::DefinedHandlingOption defined_handling, Macro::CalledContext context, const Macro *caller = NULL);
#endif // MACRO
14 changes: 11 additions & 3 deletions src/pdtoken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ Pdtoken::getnext_expand()
break;
}
expand.push_front(t);
expand = macro_expand(expand, Macro::TokenSourceOption::get_more, Macro::DefinedHandlingOption::process);
expand = macro_expand(expand, Macro::TokenSourceOption::get_more, Macro::DefinedHandlingOption::process, Macro::CalledContext::process_c);
goto expand_get;
// FALLTRHOUGH
default:
Expand Down Expand Up @@ -397,6 +397,10 @@ process_defined()
} else
last = arg;
last++;

// Mark the identifier as used as a preprocessor constant
arg->set_ec_attribute(is_cpp_const);

// We are about to erase it
string val = (*arg).get_val();
if (DP()) cout << "val:" << val << "\n";
Expand Down Expand Up @@ -446,7 +450,7 @@ eval()
}

// Macro replace, skipping identifiers for defined operator
eval_tokens = macro_expand(eval_tokens, Macro::TokenSourceOption::use_supplied, Macro::DefinedHandlingOption::skip);
eval_tokens = macro_expand(eval_tokens, Macro::TokenSourceOption::use_supplied, Macro::DefinedHandlingOption::skip, Macro::CalledContext::process_if);

if (DP()) {
cout << "Tokens after macro replace:\n";
Expand Down Expand Up @@ -539,6 +543,10 @@ Pdtoken::process_ifdef(bool isndef)
* directive is not a legal identifier
*/
Error::error(E_WARN, "#ifdef argument is not an identifier");

// Mark the identifier as used as a preprocessor constant
t.set_ec_attribute(is_cpp_const);

mapMacro::const_iterator i = macros.find(t.get_val());
if (i == macros.end())
// Heuristic; assume macro, even if it is not defined
Expand Down Expand Up @@ -677,7 +685,7 @@ Pdtoken::process_include(bool next)
if (f.get_code() != PATHFNAME && f.get_code() != ABSFNAME) {
// Need to macro process
// 1. Macro replace
tokens = macro_expand(tokens, Macro::TokenSourceOption::use_supplied, Macro::DefinedHandlingOption::process);
tokens = macro_expand(tokens, Macro::TokenSourceOption::use_supplied, Macro::DefinedHandlingOption::process, Macro::CalledContext::process_include);
if (DP()) {
cout << "Replaced after macro :\n";
copy(tokens.begin(), tokens.end(), ostream_iterator<Ptoken>(cout));
Expand Down
13 changes: 13 additions & 0 deletions src/ptoken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,19 @@ Ptoken::Ptoken(const Ctoken &t) : Token(t)
{
}

/*
* Set the is_cpp_str_val attribute for the macros that were
* expanded to yield the stringized or pasted token.
* Conveniently, these macros are part of the token's hide set.
*/
void
Ptoken::set_cpp_str_val() const
{
for (auto &tok : hideset)
tok.set_ec_attribute(is_cpp_str_val);
}


#ifdef UNIT_TEST
// cl -GX -DWIN32 -c eclass.cpp fileid.cpp tokid.cpp tokname.cpp token.cpp
// cl -GX -DWIN32 -DUNIT_TEST ptoken.cpp token.obj tokid.obj eclass.obj tokname.obj fileid.obj kernel32.lib
Expand Down
5 changes: 5 additions & 0 deletions src/ptoken.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ class Ptoken : public Token {
inline void hideset_insert(Token t) { hideset.insert(t); }
inline void hideset_insert(HideSet::const_iterator b, HideSet::const_iterator e) { hideset.insert(b, e); }
inline const HideSet& get_hideset() const { return (hideset); }
/*
* Set the is_cpp_str_val attribute for the macros that were
* expanded to yield the stringized or pasted token.
*/
void set_cpp_str_val() const;
// Print it (for debugging)
friend ostream& operator<<(ostream& o,const Ptoken &t);
inline friend bool operator ==(const Ptoken& a, const Ptoken& b);
Expand Down
121 changes: 121 additions & 0 deletions src/test/c/c54-macro-c-const.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Test cases for determining which object-like macros can be converted
* into C constants (constant variables or enum values).
* Macros that are unsuitable include:
* - Macros whose value is directly or indirectly used for token pasting
* or stringization are unsuitable.
* - Macros whose value is not a compile-time constant.
* - Macros appearing in #if, #ifdef directives or in defined()
* Note: Macros whose value is used for declaring or defining arrays can
* only be converted into enum values rather than constant variables.
*/

#define TWELVE 12

// Can be converted to enum value, because the value is directly used in C code.
#define SIMPLE_EXPANDED_MACRO 12

// Can be converted to enum value, because the value is expanded in a macro
// and then only used in C code.
#define SIMPLE_MACRO_EXPANDED_MACRO_DIRECT 12
#define SIMPLE_MACRO_EXPANDED_MACRO_AS_ARG 12

// Can be converted to enum value, because only the name is used.
#define UNEXPANDED_MACRO_NAME 12

// Can be converted to an enum value because it is unused
#define UNUSED_MACRO

// Cannot be converted to an enum value, because the macro's value
// is directly stringized or pasted.
#define EXPANDED_PASTED_MACRO 12
#define EXPANDED_STRINGIZED_MACRO 12

// Cannot be converted to an enum value, because the macro's value
// is indirectly stringized or pasted.
#define INDIRECTLY_EXPANDED_PASTED_MACRO TWELVE
#define INDIRECTLY_EXPANDED_STRINGIZED_MACRO TWELVE

// Cannot be converted because it appears in preprocessor conditionals
#define APPEARS_IN_IF 1
#define APPEARS_IN_DEFINED 1
#define APPEARS_IN_IFDEF 1

// Cannot be converted because it appears indirectly in preprocessor conditional
#define INDIRECTLY_APPEARS_IN_IF 1

#define INDIRECT_IF INDIRECTLY_APPEARS_IN_IF

#if APPEARS_IN_IF
#endif

#if defined(APPEARS_IN_DEFINED)
#endif

#ifdef APPEARS_IN_IFDEF
#endif

#if INDIRECT_IF
#endif

// Must have the is_cpp_str_val attribute set
#define STRINGIZED_NUMBER 123
#define STRINGIZED_NUMBER_2 123

// Must not have the is_cpp_str_val attribute set
#define NON_STRINGIZED_NUMBER 2

#define GET_3(x, y) STRINGIZE(x)[y]

#define IDENTITY(x) x
#define GET_3_SANDWITCH(x, y) IDENTITY(IDENTITY(STRINGIZE(x))[y])

enum { enum_value_name = 12 };

#define MACRO_ARG_EXPANSION(x) x
#define EXPANDED_MACRO SIMPLE_MACRO_EXPANDED_MACRO_DIRECT

#define CONCAT(base, ext) base ## ext
#define STRINGIZE(x) #x

#define EXPAND_AND_STRINGIZE(x) STRINGIZE(x)
#define EXPAND_AND_CONCAT(a, b) CONCAT(a, b)

int printf(const char *fmt, ...);

#define macro_printf printf

int
main()
{
int CONCAT(foo_, UNEXPANDED_MACRO_NAME);
foo_UNEXPANDED_MACRO_NAME = 0;

int CONCAT(foo_, enum_value_name);
foo_enum_value_name = 12;

int EXPAND_AND_CONCAT(bar_, EXPANDED_PASTED_MACRO);
bar_12 = 0;

int EXPAND_AND_CONCAT(foobar_, INDIRECTLY_EXPANDED_PASTED_MACRO);
foobar_12 = 0;

int EXPAND_AND_CONCAT(bar_, enum_value_name);
bar_enum_value_name = 0; // Incorrect; with macro would be bar_32

printf("expanded macro value: %d\n", SIMPLE_EXPANDED_MACRO);
printf("expanded macro value: %d\n", MACRO_ARG_EXPANSION(SIMPLE_MACRO_EXPANDED_MACRO_AS_ARG));
printf("expanded macro value: %d\n", EXPANDED_MACRO);
printf("enum value: %d\n", enum_value_name);

printf("unexpanded macro name: %s\n", STRINGIZE(UNEXPANDED_MACRO_NAME));
printf("enum value name: %s (correct)\n", STRINGIZE(enum_value_name));

printf("expanded stringized macro: %s\n", EXPAND_AND_STRINGIZE(EXPANDED_STRINGIZED_MACRO));
printf("indirectly expanded stringized macro: %s\n", EXPAND_AND_STRINGIZE(INDIRECTLY_EXPANDED_STRINGIZED_MACRO));
macro_printf("enum value: %s (incorrect)\n", EXPAND_AND_STRINGIZE(enum_value_name));

char c = GET_3(STRINGIZED_NUMBER, NON_STRINGIZED_NUMBER);
char c2 = GET_3_SANDWITCH(IDENTITY(STRINGIZED_NUMBER_2), NON_STRINGIZED_NUMBER);
return 0;
}
Loading

0 comments on commit 5bb6d05

Please sign in to comment.